From 0ed221acb8f4af07e6aabe18f7adee4eeb430992 Mon Sep 17 00:00:00 2001 From: Karma Riuk Date: Thu, 27 Mar 2025 14:05:17 +0100 Subject: [PATCH] added jacoco injections --- handlers.py | 107 ++++++++++++++++++++++++++++++++++++++++++++++- pull_requests.py | 72 +++++++++++++++++++++++++++---- 2 files changed, 169 insertions(+), 10 deletions(-) diff --git a/handlers.py b/handlers.py index db3c481..74a9086 100644 --- a/handlers.py +++ b/handlers.py @@ -104,10 +104,28 @@ class BuildHandler(ABC): finally: signal.alarm(0) # Cancel the alarm - def generate_coverage_report(self): + def generate_coverage_report(self, already_injected_manually: bool = False): result = self.container.exec_run(self.generate_coverage_report_cmd()) if result.exit_code != 0: - raise CantExecJacoco(clean_output(result.output)) + if already_injected_manually: + raise CantExecJacoco(clean_output(result.output)) + + build_file_path = os.path.join(self.path, self.build_file) + if not os.path.exists(build_file_path): + raise CantInjectJacoco("pom.xml not found") + with open(build_file_path, "r") as f: + og_content = f.read() + try: + self._try_to_inject_jacoco(build_file_path) + self.generate_coverage_report(already_injected_manually=True) + except (CantInjectJacoco, CantExecJacoco) as e: + with open(build_file_path, "w") as f: + f.write(og_content) + raise e + + @abstractmethod + def _try_to_inject_jacoco(self, build_file_path: str) -> None: + pass def check_coverage(self, filename: str) -> Iterator[Tuple[str, float]]: """ @@ -261,6 +279,54 @@ class MavenHandler(BuildHandler): if not found_at_least_one: raise NoCoverageReportFound(f"Couldn't find any 'jacoco.xml' in {self.path}") + def _try_to_inject_jacoco(self, build_file_path: str) -> None: + with open(build_file_path, "r", encoding="utf-8") as f: + content = f.read() + + if "jacoco-maven-plugin" in content: + return # already present + + jacoco_plugin = """ + + org.jacoco + jacoco-maven-plugin + 0.8.8 + + + + prepare-agent + + + + report + test + + report + + + + +""" + + if "" in content: + # just insert inside existing plugins + content = content.replace("", f"\n{jacoco_plugin}") + elif "" in content: + # plugins section doesn't exist, create full section + build_block = f""" + + + {jacoco_plugin} + + + """ + content = content.replace("", f"{build_block}\n") + else: + raise CantInjectJacoco("Could not find insertion point for plugins in pom.xml") + + with open(build_file_path, "w", encoding="utf-8") as f: + f.write(content) + class GradleHandler(BuildHandler): def __init__(self, repo_path: str, build_file: str, updates: dict) -> None: @@ -341,6 +407,39 @@ class GradleHandler(BuildHandler): f"Couldn't find any 'index.html' inside any 'reports/jacoco' in {self.path}" ) + def _try_to_inject_jacoco(self, build_file_path: str) -> None: + with open(build_file_path, "r", encoding="utf-8") as f: + content = f.read() + + if "id 'jacoco'" in content or "apply plugin: 'jacoco'" in content: + return # already present + + jacoco_snippet = """ +plugins { + id 'jacoco' +} + +jacoco { + toolVersion = "0.8.8" +} + +test { + finalizedBy jacocoTestReport +} + +jacocoTestReport { + dependsOn test + reports { + xml.required = true + html.required = true + } +}""" + + content = jacoco_snippet + "\n\n" + content + + with open(build_file_path, "w", encoding="utf-8") as f: + f.write(content) + class HandlerException(Exception, ABC): reason_for_failure = "Generic handler expection (this shouldn't appear)" @@ -366,6 +465,10 @@ class CantExecJacoco(HandlerException): reason_for_failure = "Couldn't execute jacoco" +class CantInjectJacoco(HandlerException): + reason_for_failure = "Couldn't inject jacoco in the build file" + + class NoCoverageReportFound(HandlerException): reason_for_failure = "No coverage report was found" diff --git a/pull_requests.py b/pull_requests.py index f76a8b6..3a51e58 100644 --- a/pull_requests.py +++ b/pull_requests.py @@ -28,10 +28,18 @@ def get_good_projects(csv_file: str) -> pd.DataFrame: return df.loc[(df['good_repo_for_crab'] == True) & (df['n_tests'] > 0)] -def is_pull_good(pull: PullRequest, verbose: bool = False): - return pull.user.type != "Bot" and has_only_1_comment( - pull.get_commits(), pull.get_review_comments(), verbose=verbose - ) +def is_pull_good(pull: PullRequest, verbose: bool = False) -> bool: + comments = pull.get_review_comments() + if pull.user.type != "Bot" or comments.totalCount > 2: + return False + + if comments.totalCount == 2: + comment_list = list(comments) + second_comment = comment_list[1] + if second_comment.user.login != pull.user.login: + return False + + return has_only_1_comment(pull.get_commits(), pull.get_review_comments(), verbose=verbose) def run_git_cmd(cmd: list[str], repo_path: str) -> subprocess.CompletedProcess: @@ -88,8 +96,7 @@ def process_pull( try: diffs_after = { - file.filename: file.patch - for file in repo.compare(first_commit.sha, last_commit.sha).files + file.filename: file.patch for file in repo.compare(first_commit.sha, last_commit.sha).files } except GithubException as e: return @@ -254,6 +261,48 @@ def process_repos( pbar.update(1) +def only_inject_jacoco( + dataset: Dataset, + repos_dir: str, + cache: dict[str, dict[int, DatasetEntry]] = {}, +): + n_successfull_injections = 0 + n_tried_injections = 0 + with tqdm(cache, desc="Processing repos (only for injection") as top_bar: + for repo_name in top_bar: + top_bar.set_postfix( + { + "# successfull injections": f"{n_successfull_injections}/{n_tried_injections} ({n_successfull_injections/n_tried_injections if n_tried_injections > 0 else 0:.2%})" + } + ) + with tqdm(total=len(cache[repo_name]), desc=f"Processing prs", leave=False) as pbar: + # extracting keys so that it doesn't get messy as I pop elements from the dict + pr_numbers = list(cache[repo_name].keys()) + for pr_number in pr_numbers: + pbar.set_postfix({"repo": repo_name, "pr": pr_number}) + + entry = cache[repo_name].pop(pr_number) + if entry.metadata.reason_for_failure != "Couldn't execute jacoco": + dataset.entries.append(entry) + dataset.to_json(args.output) + pbar.update(1) + continue + + n_tried_injections += 1 + repo = g.get_repo(repo_name) + pull = repo.get_pull(pr_number) + process_pull(repo, pull, dataset, repos_dir, cache) + pbar.update(1) + last_addition = dataset.entries[-1] + last_metadata = last_addition.metadata + if ( + last_metadata.repo == repo_name + and last_metadata.pr_number == pr_number + and last_metadata.successful + ): + n_successfull_injections += 1 + + if __name__ == "__main__": parser = argparse.ArgumentParser(description='Creates the triplets for the CRAB dataset.') parser.add_argument( @@ -287,6 +336,11 @@ if __name__ == "__main__": type=str, help="If this argument is not provided, all the repos in the '--repos' csv will be processed. If instead you want to run the script on a single repo (for testing purposes mainly) provide a string of form 'XXX/YYY' to this argument, where XXX is the owner of the repo and YYY is the name of the repo", ) + parser.add_argument( + "--only-inject-jacoco", + action="store_true", + help="You must provide a cache with --cache. It will take that cache and go through all the entries that failed because they couldn't execute jacoco and process them again, trying to inject jacoco manually", + ) args = parser.parse_args() g = Github(os.environ["GITHUB_AUTH_TOKEN_CRAB"]) @@ -306,7 +360,9 @@ if __name__ == "__main__": dataset = Dataset() try: - # try and finally to save, regardless of an error occuring or the program finished correctly - process_repos(df, dataset, args.repos, cache) + if args.only_inject_jacoco: + only_inject_jacoco(dataset, args.repos, cache) + else: + process_repos(df, dataset, args.repos, cache) finally: dataset.to_json(args.output)