diff --git a/handlers.py b/handlers.py
index db3c481..74a9086 100644
--- a/handlers.py
+++ b/handlers.py
@@ -104,10 +104,28 @@ class BuildHandler(ABC):
finally:
signal.alarm(0) # Cancel the alarm
- def generate_coverage_report(self):
+ def generate_coverage_report(self, already_injected_manually: bool = False):
result = self.container.exec_run(self.generate_coverage_report_cmd())
if result.exit_code != 0:
- raise CantExecJacoco(clean_output(result.output))
+ if already_injected_manually:
+ raise CantExecJacoco(clean_output(result.output))
+
+ build_file_path = os.path.join(self.path, self.build_file)
+ if not os.path.exists(build_file_path):
+ raise CantInjectJacoco("pom.xml not found")
+ with open(build_file_path, "r") as f:
+ og_content = f.read()
+ try:
+ self._try_to_inject_jacoco(build_file_path)
+ self.generate_coverage_report(already_injected_manually=True)
+ except (CantInjectJacoco, CantExecJacoco) as e:
+ with open(build_file_path, "w") as f:
+ f.write(og_content)
+ raise e
+
+ @abstractmethod
+ def _try_to_inject_jacoco(self, build_file_path: str) -> None:
+ pass
def check_coverage(self, filename: str) -> Iterator[Tuple[str, float]]:
"""
@@ -261,6 +279,54 @@ class MavenHandler(BuildHandler):
if not found_at_least_one:
raise NoCoverageReportFound(f"Couldn't find any 'jacoco.xml' in {self.path}")
+ def _try_to_inject_jacoco(self, build_file_path: str) -> None:
+ with open(build_file_path, "r", encoding="utf-8") as f:
+ content = f.read()
+
+ if "jacoco-maven-plugin" in content:
+ return # already present
+
+ jacoco_plugin = """
+
+ org.jacoco
+ jacoco-maven-plugin
+ 0.8.8
+
+
+
+ prepare-agent
+
+
+
+ report
+ test
+
+ report
+
+
+
+
+"""
+
+ if "" in content:
+ # just insert inside existing plugins
+ content = content.replace("", f"\n{jacoco_plugin}")
+ elif "" in content:
+ # plugins section doesn't exist, create full section
+ build_block = f"""
+
+
+ {jacoco_plugin}
+
+
+ """
+ content = content.replace("", f"{build_block}\n")
+ else:
+ raise CantInjectJacoco("Could not find insertion point for plugins in pom.xml")
+
+ with open(build_file_path, "w", encoding="utf-8") as f:
+ f.write(content)
+
class GradleHandler(BuildHandler):
def __init__(self, repo_path: str, build_file: str, updates: dict) -> None:
@@ -341,6 +407,39 @@ class GradleHandler(BuildHandler):
f"Couldn't find any 'index.html' inside any 'reports/jacoco' in {self.path}"
)
+ def _try_to_inject_jacoco(self, build_file_path: str) -> None:
+ with open(build_file_path, "r", encoding="utf-8") as f:
+ content = f.read()
+
+ if "id 'jacoco'" in content or "apply plugin: 'jacoco'" in content:
+ return # already present
+
+ jacoco_snippet = """
+plugins {
+ id 'jacoco'
+}
+
+jacoco {
+ toolVersion = "0.8.8"
+}
+
+test {
+ finalizedBy jacocoTestReport
+}
+
+jacocoTestReport {
+ dependsOn test
+ reports {
+ xml.required = true
+ html.required = true
+ }
+}"""
+
+ content = jacoco_snippet + "\n\n" + content
+
+ with open(build_file_path, "w", encoding="utf-8") as f:
+ f.write(content)
+
class HandlerException(Exception, ABC):
reason_for_failure = "Generic handler expection (this shouldn't appear)"
@@ -366,6 +465,10 @@ class CantExecJacoco(HandlerException):
reason_for_failure = "Couldn't execute jacoco"
+class CantInjectJacoco(HandlerException):
+ reason_for_failure = "Couldn't inject jacoco in the build file"
+
+
class NoCoverageReportFound(HandlerException):
reason_for_failure = "No coverage report was found"
diff --git a/pull_requests.py b/pull_requests.py
index f76a8b6..3a51e58 100644
--- a/pull_requests.py
+++ b/pull_requests.py
@@ -28,10 +28,18 @@ def get_good_projects(csv_file: str) -> pd.DataFrame:
return df.loc[(df['good_repo_for_crab'] == True) & (df['n_tests'] > 0)]
-def is_pull_good(pull: PullRequest, verbose: bool = False):
- return pull.user.type != "Bot" and has_only_1_comment(
- pull.get_commits(), pull.get_review_comments(), verbose=verbose
- )
+def is_pull_good(pull: PullRequest, verbose: bool = False) -> bool:
+ comments = pull.get_review_comments()
+ if pull.user.type != "Bot" or comments.totalCount > 2:
+ return False
+
+ if comments.totalCount == 2:
+ comment_list = list(comments)
+ second_comment = comment_list[1]
+ if second_comment.user.login != pull.user.login:
+ return False
+
+ return has_only_1_comment(pull.get_commits(), pull.get_review_comments(), verbose=verbose)
def run_git_cmd(cmd: list[str], repo_path: str) -> subprocess.CompletedProcess:
@@ -88,8 +96,7 @@ def process_pull(
try:
diffs_after = {
- file.filename: file.patch
- for file in repo.compare(first_commit.sha, last_commit.sha).files
+ file.filename: file.patch for file in repo.compare(first_commit.sha, last_commit.sha).files
}
except GithubException as e:
return
@@ -254,6 +261,48 @@ def process_repos(
pbar.update(1)
+def only_inject_jacoco(
+ dataset: Dataset,
+ repos_dir: str,
+ cache: dict[str, dict[int, DatasetEntry]] = {},
+):
+ n_successfull_injections = 0
+ n_tried_injections = 0
+ with tqdm(cache, desc="Processing repos (only for injection") as top_bar:
+ for repo_name in top_bar:
+ top_bar.set_postfix(
+ {
+ "# successfull injections": f"{n_successfull_injections}/{n_tried_injections} ({n_successfull_injections/n_tried_injections if n_tried_injections > 0 else 0:.2%})"
+ }
+ )
+ with tqdm(total=len(cache[repo_name]), desc=f"Processing prs", leave=False) as pbar:
+ # extracting keys so that it doesn't get messy as I pop elements from the dict
+ pr_numbers = list(cache[repo_name].keys())
+ for pr_number in pr_numbers:
+ pbar.set_postfix({"repo": repo_name, "pr": pr_number})
+
+ entry = cache[repo_name].pop(pr_number)
+ if entry.metadata.reason_for_failure != "Couldn't execute jacoco":
+ dataset.entries.append(entry)
+ dataset.to_json(args.output)
+ pbar.update(1)
+ continue
+
+ n_tried_injections += 1
+ repo = g.get_repo(repo_name)
+ pull = repo.get_pull(pr_number)
+ process_pull(repo, pull, dataset, repos_dir, cache)
+ pbar.update(1)
+ last_addition = dataset.entries[-1]
+ last_metadata = last_addition.metadata
+ if (
+ last_metadata.repo == repo_name
+ and last_metadata.pr_number == pr_number
+ and last_metadata.successful
+ ):
+ n_successfull_injections += 1
+
+
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Creates the triplets for the CRAB dataset.')
parser.add_argument(
@@ -287,6 +336,11 @@ if __name__ == "__main__":
type=str,
help="If this argument is not provided, all the repos in the '--repos' csv will be processed. If instead you want to run the script on a single repo (for testing purposes mainly) provide a string of form 'XXX/YYY' to this argument, where XXX is the owner of the repo and YYY is the name of the repo",
)
+ parser.add_argument(
+ "--only-inject-jacoco",
+ action="store_true",
+ help="You must provide a cache with --cache. It will take that cache and go through all the entries that failed because they couldn't execute jacoco and process them again, trying to inject jacoco manually",
+ )
args = parser.parse_args()
g = Github(os.environ["GITHUB_AUTH_TOKEN_CRAB"])
@@ -306,7 +360,9 @@ if __name__ == "__main__":
dataset = Dataset()
try:
- # try and finally to save, regardless of an error occuring or the program finished correctly
- process_repos(df, dataset, args.repos, cache)
+ if args.only_inject_jacoco:
+ only_inject_jacoco(dataset, args.repos, cache)
+ else:
+ process_repos(df, dataset, args.repos, cache)
finally:
dataset.to_json(args.output)