now using the build handler to check for

compilation at the commit of merge
This commit is contained in:
Karma Riuk
2025-03-17 09:22:32 +01:00
parent 082f6c6f71
commit 2fad232dd2

View File

@ -1,4 +1,4 @@
import argparse, os, subprocess import argparse, os, subprocess, docker
from typing import Optional from typing import Optional
from github.PullRequest import PullRequest from github.PullRequest import PullRequest
from github.Repository import Repository from github.Repository import Repository
@ -8,7 +8,8 @@ from tqdm import tqdm
from datetime import datetime from datetime import datetime
from dataset import Dataset, DatasetEntry, FileData, Metadata, Diff from dataset import Dataset, DatasetEntry, FileData, Metadata, Diff
from utils import has_only_1_comment, move_github_logging_to_file from handlers import FailedToCompileError, FailedToTestError, NoTestsFoundError, NoTestResultsToExtractError, get_build_handler
from utils import has_only_1_comment, move_github_logging_to_file, clone
def get_good_projects(csv_file: str) -> pd.DataFrame: def get_good_projects(csv_file: str) -> pd.DataFrame:
@ -72,15 +73,64 @@ def process_pull(repo: Repository, pr: PullRequest, dataset: Dataset, repos_dir:
comment_text = comments[0].body if comments else "" comment_text = comments[0].body if comments else ""
diffs_after = [Diff(file.filename, file.patch) for file in repo.compare(first_commit.sha, last_commit.sha).files] diffs_after = [Diff(file.filename, file.patch) for file in repo.compare(first_commit.sha, last_commit.sha).files]
entry = DatasetEntry(
metadata=Metadata(repo.full_name, pr.number, pr.merge_commit_sha),
dataset.entries.append(DatasetEntry(
metadata=Metadata(repo.full_name, pr.number, pr.merge_commit_sha, True),
files=[FileData(file.filename) for file in pr.get_files()], files=[FileData(file.filename) for file in pr.get_files()],
diffs_before=diffs_before, diffs_before=diffs_before,
comment=comment_text, comment=comment_text,
diffs_after=diffs_after, diffs_after=diffs_after,
)) )
dataset.entries.append(entry)
repo_path = os.path.join(repos_dir, repo.full_name)
updates = {}
if not clone(repo.full_name, repos_dir, updates):
entry.metadata.last_cmd_error_msg = updates["error_msg"]
entry.metadata.reason_for_failure = "Couldn't clone the repo successfully"
entry.metadata.successful = False
try:
subprocess.run(["git", "-C", repo_path, "checkout", pr.merge_commit_sha], check=True)
except subprocess.CalledProcessError as e:
entry.metadata.last_cmd_error_msg = e.stderr
entry.metadata.reason_for_failure = f"Couldn't checkout the commit '{pr.merge_commit_sha}'"
entry.metadata.successful = False
return
build_handler = get_build_handler(repos_dir, repo.full_name, updates)
if build_handler is None:
entry.metadata.last_cmd_error_msg = updates["error_msg"]
entry.metadata.reason_for_failure = "Couldn't get the build handler"
entry.metadata.successful = False
return
steps = [
("Checking for tests...", build_handler.check_for_tests),
("Compiling...", build_handler.compile_repo),
("Running tests...", build_handler.test_repo),
]
error_map = {
NoTestsFoundError: "No tests found",
FailedToCompileError: "Failed to compile",
FailedToTestError: "Failed to test",
NoTestResultsToExtractError: "Failed to extract test results",
}
with build_handler, tqdm(total=len(steps), desc="Processing PR", leave=False) as pbar:
try:
for message, action in steps:
pbar.set_postfix_str(message)
action()
pbar.update(1)
except tuple(error_map) as e:
entry.metadata.last_cmd_error_msg = str(e)
entry.metadata.reason_for_failure = error_map[type(e)]
entry.metadata.successful = False
finally:
build_handler.clean_repo()
def process_repo(repo_name: str, stats_df: Optional[pd.DataFrame], dataset: Dataset, repos_dir: str): def process_repo(repo_name: str, stats_df: Optional[pd.DataFrame], dataset: Dataset, repos_dir: str):
good_prs = [] good_prs = []
@ -135,11 +185,12 @@ if __name__ == "__main__":
args = parser.parse_args() args = parser.parse_args()
g = Github(os.environ["GITHUB_AUTH_TOKEN_CRAB"]) g = Github(os.environ["GITHUB_AUTH_TOKEN_CRAB"])
docker_client = docker.from_env()
move_github_logging_to_file() move_github_logging_to_file()
dataset = Dataset() dataset = Dataset()
try: try:
# try and finally to save, regardless of an error occuring or the program finished correctly # try and finally to save, regardless of an error occuring or the program finished correctly
process_repos(args.csv_file, args.stats, args.repos, dataset) process_repos(args.csv_file, args.stats, dataset, args.repos)
finally: finally:
dataset.to_json(args.output) dataset.to_json(args.output)