mirror of
https://github.com/karma-riuk/crab.git
synced 2025-07-04 21:28:12 +02:00
now using only the new datset version
This commit is contained in:
@ -131,7 +131,7 @@ class Dataset_new:
|
||||
json.dump(self, f, default=lambda o: o.__dict__, indent=4)
|
||||
|
||||
@staticmethod
|
||||
def from_json(filename: str, keep_still_in_progress: bool = False) -> "Dataset":
|
||||
def from_json(filename: str, keep_still_in_progress: bool = False) -> "Dataset_new":
|
||||
raise NotImplementedError("This method is not implemented yet")
|
||||
|
||||
|
||||
|
45
errors.py
Normal file
45
errors.py
Normal file
@ -0,0 +1,45 @@
|
||||
from abc import ABC
|
||||
|
||||
|
||||
class SetupException(Exception, ABC):
|
||||
reason_for_failure: str
|
||||
|
||||
|
||||
class NoDiffsBeforeError(SetupException):
|
||||
reason_for_failure = "Couldn't get the diffs before the first commit"
|
||||
|
||||
|
||||
class NoDiffsAfterError(SetupException):
|
||||
reason_for_failure = "Couldn't get the diffs after the last commit"
|
||||
|
||||
|
||||
class CantCloneRepoError(SetupException):
|
||||
reason_for_failure = "Couldn't clone the repository"
|
||||
|
||||
|
||||
class CantEnsureFullHistoryError(SetupException):
|
||||
reason_for_failure = "Couldn't ensure the full history of the repo (fetch --unshallow)"
|
||||
|
||||
|
||||
class CantFetchPRError(SetupException):
|
||||
reason_for_failure = "Couldn't fetch the PR's merge commit"
|
||||
|
||||
|
||||
class CantCheckoutCommitError(SetupException):
|
||||
reason_for_failure = (
|
||||
"Coudln't checkout the PR's merge commit (even after fetching the pull/<pr_number>/merge)"
|
||||
)
|
||||
|
||||
|
||||
class MultipleFilesError(SetupException):
|
||||
reason_for_failure = (
|
||||
"When requesting the contents of a file, a list of ContentFile was returned"
|
||||
)
|
||||
|
||||
|
||||
class NotValidDirectory(SetupException):
|
||||
reason_for_failure = "The directory is not valid"
|
||||
|
||||
|
||||
class CantFindBuildFile(SetupException):
|
||||
reason_for_failure = "Couldn't find the build file in the directory"
|
85
handlers.py
85
handlers.py
@ -1,10 +1,12 @@
|
||||
from abc import ABC, abstractmethod
|
||||
import os, re, docker, signal, sys, javalang
|
||||
import os, re, docker, signal, javalang
|
||||
from bs4 import BeautifulSoup
|
||||
from typing import Iterable, Optional, Tuple, Iterator
|
||||
from typing import Iterable, Tuple, Iterator
|
||||
import xml.etree.ElementTree as ET
|
||||
from javalang.tree import PackageDeclaration
|
||||
|
||||
from errors import CantFindBuildFile, NotValidDirectory
|
||||
|
||||
REPORT_SIZE_THRESHOLD = 400 # less than 400 bytes (charcaters), we don't care about it
|
||||
|
||||
|
||||
@ -77,7 +79,9 @@ class BuildHandler(ABC):
|
||||
raise FailedToCompileError(output)
|
||||
except TimeoutError:
|
||||
self.updates["compiled_successfully"] = False
|
||||
self.updates["error_msg"] = "Compile process killed due to exceeding the 1-hour time limit"
|
||||
self.updates[
|
||||
"error_msg"
|
||||
] = "Compile process killed due to exceeding the 1-hour time limit"
|
||||
finally:
|
||||
signal.alarm(0) # Cancel the alarm
|
||||
|
||||
@ -135,7 +139,9 @@ class BuildHandler(ABC):
|
||||
candidates = []
|
||||
for coverage_report_path in self.get_jacoco_report_paths():
|
||||
if not os.path.exists(coverage_report_path):
|
||||
raise NoCoverageReportFound(f"Coverage report file '{coverage_report_path}' does not exist")
|
||||
raise NoCoverageReportFound(
|
||||
f"Coverage report file '{coverage_report_path}' does not exist"
|
||||
)
|
||||
|
||||
fully_qualified_class = self._extract_fully_qualified_class(filename)
|
||||
candidates.append({"report_file": coverage_report_path, "fqc": fully_qualified_class})
|
||||
@ -220,7 +226,7 @@ class BuildHandler(ABC):
|
||||
|
||||
|
||||
class MavenHandler(BuildHandler):
|
||||
def __init__(self, repo_path: str, build_file: str, updates: dict) -> None:
|
||||
def __init__(self, repo_path: str, build_file: str, updates: dict = {}) -> None:
|
||||
super().__init__(repo_path, build_file, updates)
|
||||
self.base_cmd = "mvn -B -Dstyle.color=never -Dartifact.download.skip=true"
|
||||
# -B (Batch Mode): Runs Maven in non-interactive mode, reducing output and removing download progress bars.
|
||||
@ -265,7 +271,9 @@ class MavenHandler(BuildHandler):
|
||||
self.updates["n_tests_failed"] += failures
|
||||
self.updates["n_tests_errors"] += errors
|
||||
self.updates["n_tests_skipped"] += skipped
|
||||
self.updates["n_tests_passed"] += tests_run - (failures + errors) # Calculate passed tests
|
||||
self.updates["n_tests_passed"] += tests_run - (
|
||||
failures + errors
|
||||
) # Calculate passed tests
|
||||
|
||||
def get_jacoco_report_paths(self) -> Iterable[str]:
|
||||
found_at_least_one = False
|
||||
@ -329,7 +337,7 @@ class MavenHandler(BuildHandler):
|
||||
|
||||
|
||||
class GradleHandler(BuildHandler):
|
||||
def __init__(self, repo_path: str, build_file: str, updates: dict) -> None:
|
||||
def __init__(self, repo_path: str, build_file: str, updates: dict = {}) -> None:
|
||||
super().__init__(repo_path, build_file, updates)
|
||||
self.base_cmd = "gradle --no-daemon --console=plain"
|
||||
|
||||
@ -360,7 +368,9 @@ class GradleHandler(BuildHandler):
|
||||
|
||||
test_results_path = os.path.join(self.path, "build/reports/tests/test/index.html")
|
||||
if not os.path.exists(test_results_path):
|
||||
raise NoTestResultsToExtractError("No test results found (prolly a repo with sub-projects)")
|
||||
raise NoTestResultsToExtractError(
|
||||
"No test results found (prolly a repo with sub-projects)"
|
||||
)
|
||||
|
||||
# Load the HTML file
|
||||
with open(test_results_path, "r") as file:
|
||||
@ -374,7 +384,9 @@ class GradleHandler(BuildHandler):
|
||||
# counter_div = test_div.find("div", class_="counter")
|
||||
counter_div = test_div.select_one("div.counter")
|
||||
if counter_div is None:
|
||||
raise NoTestResultsToExtractError("No test results found (not div.counter for tests)")
|
||||
raise NoTestResultsToExtractError(
|
||||
"No test results found (not div.counter for tests)"
|
||||
)
|
||||
|
||||
self.updates["n_tests"] = int(counter_div.text.strip())
|
||||
|
||||
@ -386,12 +398,16 @@ class GradleHandler(BuildHandler):
|
||||
# counter_div = failures_div.find("div", class_="counter")
|
||||
counter_div = failures_div.select_one("div.counter")
|
||||
if counter_div is None:
|
||||
raise NoTestResultsToExtractError("No test results found (not div.counter for failures)")
|
||||
raise NoTestResultsToExtractError(
|
||||
"No test results found (not div.counter for failures)"
|
||||
)
|
||||
|
||||
self.updates["n_tests_failed"] = int(counter_div.text.strip())
|
||||
|
||||
# Calculate passed tests
|
||||
self.updates["n_tests_passed"] = self.updates["n_tests"] - self.updates["n_tests_failed"]
|
||||
self.updates["n_tests_passed"] = (
|
||||
self.updates["n_tests"] - self.updates["n_tests_failed"]
|
||||
)
|
||||
|
||||
def get_jacoco_report_paths(self) -> Iterable[str]:
|
||||
found_at_least_one = False
|
||||
@ -577,7 +593,7 @@ def get_coverage_for_file(xml_file: str, target_fully_qualified_class: str, base
|
||||
return -1
|
||||
|
||||
|
||||
def get_build_handler(root: str, repo: str, updates: dict, verbose: bool = False) -> Optional[BuildHandler]:
|
||||
def get_build_handler(root: str, repo: str, verbose: bool = False) -> BuildHandler:
|
||||
"""
|
||||
Get the path to the build file of a repository. The build file is either a
|
||||
`pom.xml`, `build.gradle`, or `build.xml` file.
|
||||
@ -592,38 +608,33 @@ def get_build_handler(root: str, repo: str, updates: dict, verbose: bool = False
|
||||
path = os.path.join(root, repo)
|
||||
# Check if the given path is a directory
|
||||
if not os.path.isdir(path):
|
||||
error_msg = f"The path {path} is not a valid directory."
|
||||
print(error_msg, file=sys.stderr)
|
||||
updates["error_msg"] = error_msg
|
||||
return None
|
||||
raise NotValidDirectory(f"The path {path} is not a valid directory.")
|
||||
|
||||
to_keep = ["pom.xml", "build.gradle"]
|
||||
for entry in os.scandir(path):
|
||||
if entry.is_file() and entry.name in to_keep:
|
||||
if verbose:
|
||||
print(f"Found {entry.name} in {repo} root, so keeping it and returning")
|
||||
updates["depth_of_build_file"] = 0
|
||||
if entry.name == "build.gradle":
|
||||
updates["build_system"] = "gradle"
|
||||
return GradleHandler(path, entry.name, updates)
|
||||
return GradleHandler(path, entry.name)
|
||||
else:
|
||||
updates["build_system"] = "maven"
|
||||
return MavenHandler(path, entry.name, updates)
|
||||
return MavenHandler(path, entry.name)
|
||||
|
||||
# List files in the immediate subdirectories
|
||||
for entry in os.scandir(path):
|
||||
if entry.is_dir():
|
||||
for sub_entry in os.scandir(entry.path):
|
||||
if sub_entry.is_file() and sub_entry.name in to_keep:
|
||||
if verbose:
|
||||
print(f"Found {sub_entry.name} in {repo} first level, so keeping it and returning")
|
||||
updates["depth_of_build_file"] = 1
|
||||
if entry.name == "build.gradle":
|
||||
updates["build_system"] = "gradle"
|
||||
return GradleHandler(path, os.path.join(entry.name, sub_entry.name), updates)
|
||||
else:
|
||||
updates["build_system"] = "maven"
|
||||
return MavenHandler(path, os.path.join(entry.name, sub_entry.name), updates)
|
||||
raise CantFindBuildFile(f"Couldn't find any of {to_keep} build files in the directory '{path}'")
|
||||
# # List files in the immediate subdirectories
|
||||
# for entry in os.scandir(path):
|
||||
# if entry.is_dir():
|
||||
# for sub_entry in os.scandir(entry.path):
|
||||
# if sub_entry.is_file() and sub_entry.name in to_keep:
|
||||
# if verbose:
|
||||
# print(f"Found {sub_entry.name} in {repo} first level, so keeping it and returning")
|
||||
# updates["depth_of_build_file"] = 1
|
||||
# if entry.name == "build.gradle":
|
||||
# updates["build_system"] = "gradle"
|
||||
# return GradleHandler(path, os.path.join(entry.name, sub_entry.name), updates)
|
||||
# else:
|
||||
# updates["build_system"] = "maven"
|
||||
# return MavenHandler(path, os.path.join(entry.name, sub_entry.name), updates)
|
||||
|
||||
updates["error_msg"] = "No build file found"
|
||||
return None
|
||||
# updates["error_msg"] = "No build file found"
|
||||
# return None
|
||||
|
308
pull_requests.py
308
pull_requests.py
@ -1,6 +1,8 @@
|
||||
from collections import defaultdict
|
||||
import argparse, os, subprocess, docker
|
||||
from typing import Any, Callable
|
||||
from github.Commit import Commit
|
||||
from github.ContentFile import ContentFile
|
||||
from github.PullRequest import PullRequest
|
||||
from github.Repository import Repository
|
||||
import pandas as pd
|
||||
@ -8,7 +10,26 @@ from github import Github, GithubException
|
||||
from tqdm import tqdm
|
||||
from datetime import datetime
|
||||
|
||||
from dataset import Dataset, DatasetEntry, FileData, Metadata
|
||||
from dataset import (
|
||||
Comment,
|
||||
Dataset,
|
||||
Dataset_new,
|
||||
DatasetEntry,
|
||||
DatasetEntry_new,
|
||||
FileData,
|
||||
FileData_new,
|
||||
Metadata,
|
||||
Metadata_new,
|
||||
)
|
||||
from errors import (
|
||||
CantCheckoutCommitError,
|
||||
CantEnsureFullHistoryError,
|
||||
CantFetchPRError,
|
||||
MultipleFilesError,
|
||||
NoDiffsAfterError,
|
||||
NoDiffsBeforeError,
|
||||
SetupException,
|
||||
)
|
||||
from handlers import HandlerException, get_build_handler
|
||||
from utils import has_only_1_comment, move_github_logging_to_file, clone, run_git_cmd
|
||||
|
||||
@ -54,115 +75,216 @@ def reset_repo_to_latest_commit(repo_path: str) -> None:
|
||||
run_git_cmd(["reset", "--hard", current_branch], repo_path)
|
||||
|
||||
|
||||
def get_diffs_before(repo: Repository, pr: PullRequest) -> dict[str, str]:
|
||||
comments = list(pr.get_review_comments())
|
||||
comments.sort(key=lambda comment: comment.created_at)
|
||||
first_comment = comments[0]
|
||||
try:
|
||||
return {
|
||||
file.filename: file.patch
|
||||
for file in repo.compare(pr.base.sha, first_comment.commit_id).files
|
||||
}
|
||||
except GithubException as e:
|
||||
raise NoDiffsBeforeError(e)
|
||||
|
||||
|
||||
def get_diffs_after(repo: Repository, pr: PullRequest) -> dict[str, str]:
|
||||
comments = list(pr.get_review_comments())
|
||||
comments.sort(key=lambda comment: comment.created_at)
|
||||
first_commit_after_comment = None
|
||||
commits = list(pr.get_commits())
|
||||
commits.sort(key=lambda commit: commit.commit.author.date)
|
||||
for commit in commits:
|
||||
if commit.commit.author.date > comments[0].created_at:
|
||||
first_commit_after_comment = commit
|
||||
break
|
||||
|
||||
assert first_commit_after_comment is not None, "No commit after the comment"
|
||||
|
||||
try:
|
||||
return {
|
||||
file.filename: file.patch
|
||||
for file in repo.compare(first_commit_after_comment.sha, pr.base.sha).files
|
||||
}
|
||||
except GithubException as e:
|
||||
raise NoDiffsAfterError(e)
|
||||
|
||||
|
||||
def checkout(repo_path: str, pr: PullRequest) -> None:
|
||||
try:
|
||||
ensure_full_history(repo_path)
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise CantEnsureFullHistoryError(e.stderr)
|
||||
|
||||
try:
|
||||
run_git_cmd(["checkout", pr.merge_commit_sha], repo_path)
|
||||
except subprocess.CalledProcessError:
|
||||
try:
|
||||
run_git_cmd(["fetch", "origin", f"pull/{pr.number}/merge"], repo_path)
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise CantFetchPRError(e.stderr)
|
||||
|
||||
try:
|
||||
run_git_cmd(["checkout", pr.merge_commit_sha], repo_path)
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise CantCheckoutCommitError(e.stderr)
|
||||
|
||||
|
||||
def try_read_file(fname: str) -> str:
|
||||
if not os.path.exists(fname):
|
||||
return "" # file was removed after the PR
|
||||
try:
|
||||
with open(fname, "r", encoding="utf-8") as f:
|
||||
return f.read()
|
||||
except UnicodeDecodeError:
|
||||
return "Binary file (from filesystem), to be ignored"
|
||||
except IsADirectoryError:
|
||||
return "File listed in PR is a directory (likely a submodule), to be ignored"
|
||||
|
||||
|
||||
def get_files(pr: PullRequest, repo: Repository, repo_path: str) -> dict[str, FileData_new]:
|
||||
ret = {}
|
||||
for file in pr.get_files():
|
||||
try:
|
||||
contents = repo.get_contents(file.filename, ref=pr.base.sha)
|
||||
assert isinstance(
|
||||
contents, ContentFile
|
||||
), f"Multiple files with the same name {file.filename} in base sha {pr.base.sha} ({contents})"
|
||||
contents_before = contents.decoded_content.decode()
|
||||
except AssertionError as e:
|
||||
raise MultipleFilesError(e)
|
||||
except UnicodeError as e:
|
||||
contents_before = "Binary content (from API), to be ignored"
|
||||
except Exception as e:
|
||||
contents_before = "" # file didn't exist before the PR
|
||||
|
||||
try:
|
||||
contents = repo.get_contents(file.filename, ref=pr.merge_commit_sha)
|
||||
assert isinstance(
|
||||
contents, ContentFile
|
||||
), f"Multiple files with the same name {file.filename} in merge commit sha {pr.base.sha} ({contents})"
|
||||
contents_after = contents.decoded_content.decode()
|
||||
except AssertionError as e:
|
||||
raise MultipleFilesError(e)
|
||||
except UnicodeError as e:
|
||||
contents_after = "Binary content (from API), to be ignored"
|
||||
except Exception as e:
|
||||
checkout(repo_path, pr)
|
||||
contents_after = try_read_file(os.path.join(repo_path, file.filename))
|
||||
|
||||
ret[file.filename] = FileData_new(
|
||||
is_code_related=file.filename.endswith('.java'),
|
||||
coverage={},
|
||||
content_before_pr=contents_before,
|
||||
content_after_pr=contents_after,
|
||||
)
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
def get_comments(pr: PullRequest) -> list[Comment]:
|
||||
ret = []
|
||||
for comment in pr.get_review_comments():
|
||||
comment = Comment(
|
||||
body=comment.body,
|
||||
file=comment.path,
|
||||
from_=comment.start_line if comment.start_line else comment.line,
|
||||
to=comment.line,
|
||||
)
|
||||
if comment.from_ is None or comment.to is None:
|
||||
comment.to = comment.original_line
|
||||
comment.from_ = comment.original_start_line
|
||||
ret.append(comment)
|
||||
return ret
|
||||
|
||||
|
||||
def process_pull(
|
||||
repo: Repository,
|
||||
pr: PullRequest,
|
||||
dataset: Dataset,
|
||||
dataset: Dataset_new,
|
||||
repos_dir: str,
|
||||
cache: dict[str, dict[int, DatasetEntry]] = {},
|
||||
cache: dict[str, dict[int, DatasetEntry_new]] = {},
|
||||
):
|
||||
if pr.number in cache.get(repo.full_name, set()):
|
||||
dataset.entries.append(cache[repo.full_name][pr.number])
|
||||
return
|
||||
|
||||
commits = list(pr.get_commits())
|
||||
if not commits:
|
||||
return # No commits, skip processing
|
||||
|
||||
first_commit = commits[0]
|
||||
last_commit = commits[-1]
|
||||
|
||||
try:
|
||||
diffs_before = {
|
||||
file.filename: file.patch for file in repo.compare(pr.base.sha, first_commit.sha).files
|
||||
}
|
||||
except GithubException as e:
|
||||
return
|
||||
entry = DatasetEntry_new(
|
||||
metadata=Metadata_new(
|
||||
repo.full_name,
|
||||
pr.number,
|
||||
pr.title,
|
||||
pr.body,
|
||||
pr.merge_commit_sha,
|
||||
reason_for_failure="Was still being processed",
|
||||
),
|
||||
files={},
|
||||
diffs_before={},
|
||||
comments=[],
|
||||
diffs_after={},
|
||||
)
|
||||
dataset.entries.append(entry)
|
||||
|
||||
comments = list(pr.get_review_comments())
|
||||
assert len(comments) == 1
|
||||
comment = comments[0]
|
||||
comment_text = comment.body
|
||||
commented_file_path = comment.path
|
||||
|
||||
try:
|
||||
diffs_after = {
|
||||
file.filename: file.patch
|
||||
for file in repo.compare(first_commit.sha, last_commit.sha).files
|
||||
}
|
||||
except GithubException as e:
|
||||
return
|
||||
|
||||
entry = DatasetEntry(
|
||||
metadata=Metadata(
|
||||
repo.full_name,
|
||||
pr.number,
|
||||
pr.merge_commit_sha,
|
||||
{comment_text: commented_file_path},
|
||||
reason_for_failure="Was still being processed",
|
||||
),
|
||||
files={file.filename: FileData(file.filename) for file in pr.get_files()},
|
||||
diffs_before=diffs_before,
|
||||
comments=[comment_text],
|
||||
diffs_after=diffs_after,
|
||||
)
|
||||
dataset.entries.append(entry)
|
||||
|
||||
repo_path = os.path.join(repos_dir, repo.full_name)
|
||||
|
||||
updates = {}
|
||||
if not clone(repo.full_name, repos_dir, updates):
|
||||
entry.metadata.last_cmd_error_msg = updates["error_msg"]
|
||||
entry.metadata.reason_for_failure = "Couldn't clone the repo successfully"
|
||||
entry.metadata.successful = False
|
||||
build_handler = None
|
||||
|
||||
def _try_cmd(action: Callable[[], Any], reason_for_failure: str) -> bool:
|
||||
"""
|
||||
Tries a command, and if it fails, it sets the metadata of the entry.
|
||||
"""
|
||||
try:
|
||||
# return action()
|
||||
action()
|
||||
except subprocess.CalledProcessError as e:
|
||||
entry.metadata.last_cmd_error_msg = f"{e.stderr}"
|
||||
entry.metadata.reason_for_failure = reason_for_failure
|
||||
entry.metadata.successful = False
|
||||
# raise e
|
||||
return entry.metadata.successful
|
||||
setup_steps = [
|
||||
(
|
||||
"Getting diffs before the first commit...",
|
||||
lambda: entry.diffs_before.update(get_diffs_before(repo, pr)),
|
||||
),
|
||||
(
|
||||
"Getting diffs after the first commit...",
|
||||
lambda: entry.diffs_after.update(get_diffs_after(repo, pr)),
|
||||
),
|
||||
("Cloning the repo...", lambda: clone(repo.full_name, repos_dir)),
|
||||
(
|
||||
"Getting the files...",
|
||||
lambda: entry.files.update(get_files(pr, repo, repo_path)),
|
||||
),
|
||||
(
|
||||
"Getting the comments...",
|
||||
lambda: entry.comments.extend(get_comments(pr)),
|
||||
),
|
||||
("Checkout out merge commit...", lambda: checkout(repo_path, pr)),
|
||||
]
|
||||
|
||||
if not _try_cmd(
|
||||
lambda: ensure_full_history(repo_path),
|
||||
"Couldn't ensure the full history of the repo (fetch --unshallow)",
|
||||
):
|
||||
return
|
||||
with tqdm(total=len(setup_steps), desc="Setting up PR", leave=False) as pbar:
|
||||
for message, action in setup_steps:
|
||||
pbar.set_postfix(
|
||||
{
|
||||
"doing": message,
|
||||
"started at": datetime.now().strftime("%d/%m, %H:%M:%S"),
|
||||
}
|
||||
)
|
||||
try:
|
||||
action()
|
||||
except SetupException as e:
|
||||
entry.metadata.last_cmd_error_msg = str(e)
|
||||
entry.metadata.reason_for_failure = e.reason_for_failure
|
||||
entry.metadata.successful = False
|
||||
return
|
||||
pbar.update(1)
|
||||
|
||||
try:
|
||||
run_git_cmd(["checkout", pr.merge_commit_sha], repo_path)
|
||||
except subprocess.CalledProcessError:
|
||||
if not _try_cmd(
|
||||
lambda: run_git_cmd(["fetch", "origin", f"pull/{pr.number}/merge"], repo_path),
|
||||
"Couldn't fetch the PR's merge commit",
|
||||
):
|
||||
return
|
||||
|
||||
if not _try_cmd(
|
||||
lambda: run_git_cmd(["checkout", pr.merge_commit_sha], repo_path),
|
||||
"Coudln't checkout the PR's merge commit (even after fetching the pull/<pr_number>/merge)",
|
||||
):
|
||||
return
|
||||
|
||||
build_handler = get_build_handler(repos_dir, repo.full_name, updates)
|
||||
if build_handler is None:
|
||||
entry.metadata.last_cmd_error_msg = updates["error_msg"]
|
||||
entry.metadata.reason_for_failure = "Couldn't get the build handler"
|
||||
build_handler = get_build_handler(repos_dir, repo.full_name)
|
||||
entry.metadata.build_system = build_handler.get_type()
|
||||
build_handler.set_client(docker_client)
|
||||
except SetupException as e:
|
||||
entry.metadata.last_cmd_error_msg = str(e)
|
||||
entry.metadata.reason_for_failure = e.reason_for_failure
|
||||
entry.metadata.successful = False
|
||||
return
|
||||
entry.metadata.build_system = build_handler.get_type()
|
||||
build_handler.set_client(docker_client)
|
||||
|
||||
def _check_coverages():
|
||||
for coverage_file, coverage in build_handler.check_coverage(commented_file_path):
|
||||
entry.metadata.commented_files_coverages[commented_file_path][coverage_file] = coverage
|
||||
entry.files[commented_file_path].coverage[coverage_file] = coverage
|
||||
|
||||
steps = [
|
||||
("Checking for tests...", build_handler.check_for_tests),
|
||||
@ -197,9 +319,9 @@ def process_pull(
|
||||
|
||||
def process_repo(
|
||||
repo_name: str,
|
||||
dataset: Dataset,
|
||||
dataset: Dataset_new,
|
||||
repos_dir: str,
|
||||
cache: dict[str, dict[int, DatasetEntry]] = {},
|
||||
cache: dict[str, dict[int, DatasetEntry_new]] = {},
|
||||
):
|
||||
repo = g.get_repo(repo_name)
|
||||
if repo.full_name in cache:
|
||||
@ -224,9 +346,9 @@ def process_repo(
|
||||
|
||||
def process_repos(
|
||||
df: pd.DataFrame,
|
||||
dataset: Dataset,
|
||||
dataset: Dataset_new,
|
||||
repos_dir: str,
|
||||
cache: dict[str, dict[int, DatasetEntry]] = {},
|
||||
cache: dict[str, dict[int, DatasetEntry_new]] = {},
|
||||
):
|
||||
"""
|
||||
Processes the repos in the given csv file, extracting the good ones and
|
||||
@ -254,9 +376,9 @@ def process_repos(
|
||||
|
||||
|
||||
def only_inject_jacoco(
|
||||
dataset: Dataset,
|
||||
dataset: Dataset_new,
|
||||
repos_dir: str,
|
||||
cache: dict[str, dict[int, DatasetEntry]] = {},
|
||||
cache: dict[str, dict[int, DatasetEntry_new]] = {},
|
||||
):
|
||||
n_successfull_injections = 0
|
||||
n_tried_injections = 0
|
||||
@ -344,13 +466,13 @@ if __name__ == "__main__":
|
||||
if args.only_repo is not None:
|
||||
df = df.loc[df["name"] == args.only_repo]
|
||||
|
||||
cache: dict[str, dict[int, DatasetEntry]] = defaultdict(dict)
|
||||
cache: dict[str, dict[int, DatasetEntry_new]] = defaultdict(dict)
|
||||
if args.cache is not None:
|
||||
cache_dataset = Dataset.from_json(args.cache)
|
||||
cache_dataset = Dataset_new.from_json(args.cache)
|
||||
for cache_entry in cache_dataset.entries:
|
||||
cache[cache_entry.metadata.repo][cache_entry.metadata.pr_number] = cache_entry
|
||||
|
||||
dataset = Dataset()
|
||||
dataset = Dataset_new()
|
||||
try:
|
||||
if args.only_inject_jacoco:
|
||||
only_inject_jacoco(dataset, args.repos, cache)
|
||||
|
Reference in New Issue
Block a user