mirror of
https://github.com/karma-riuk/crab-webapp.git
synced 2025-07-05 06:08:13 +02:00
added the dataset and the handlers from the
dataset_builder and started modifying them for the webapp
This commit is contained in:
201
src/utils/dataset.py
Normal file
201
src/utils/dataset.py
Normal file
@ -0,0 +1,201 @@
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
import json, uuid
|
||||
|
||||
# fmt: off
|
||||
@dataclass
|
||||
class FileData:
|
||||
is_code_related: bool
|
||||
coverage: Dict[str, float] # jacoco-report -> coverage
|
||||
content_before_pr: str = ""
|
||||
content_after_pr: str = ""
|
||||
|
||||
@dataclass
|
||||
class Comment:
|
||||
body: str
|
||||
file: str
|
||||
from_: int
|
||||
to: int
|
||||
paraphrases: List[str] = field(default_factory=list)
|
||||
|
||||
@dataclass
|
||||
class Selection:
|
||||
comment_suggests_change: bool
|
||||
diff_after_address_change: Optional[bool]
|
||||
is_code_related: bool
|
||||
|
||||
class ArchiveState(Enum):
|
||||
BASE = "base"
|
||||
MERGED = "merged"
|
||||
|
||||
@dataclass
|
||||
class Metadata:
|
||||
id: str
|
||||
repo: str # the name of the repo, with style XXX/YYY
|
||||
pr_number: int
|
||||
pr_title: str
|
||||
pr_body: str
|
||||
merge_commit_sha: str # to checkout for the tests
|
||||
successful: bool = True
|
||||
build_system: str = ""
|
||||
reason_for_failure: str = ""
|
||||
last_cmd_error_msg: str = ""
|
||||
selection: Optional[Selection] = None
|
||||
|
||||
def archive_name(self, state: ArchiveState, only_id:bool=False):
|
||||
if only_id:
|
||||
return f"{self.id}_{state.value}.tar.gz"
|
||||
return f"{self.repo.replace('/', '_')}_{self.pr_number}_{state.value}.tar.gz"
|
||||
|
||||
@dataclass
|
||||
class DatasetEntry:
|
||||
metadata: Metadata
|
||||
files: Dict[str, FileData] # filename -> file data, files before the PR (before the first PR commits)
|
||||
diffs_before: Dict[str, str] # filename -> diff, diffs between the opening of the PR and the comment
|
||||
comments: List[Comment]
|
||||
diffs_after: Dict[str, str] # filename -> diff, changes after the comment
|
||||
|
||||
|
||||
@dataclass
|
||||
class CommentGenEntry:
|
||||
id: str
|
||||
files: Dict[str, str] # filename -> file content
|
||||
diffs: Dict[str, str] # filename -> diff, diffs between the opening of the PR and the comment
|
||||
|
||||
@staticmethod
|
||||
def from_entry(entry: DatasetEntry) -> "CommentGenEntry":
|
||||
return CommentGenEntry(
|
||||
id=entry.metadata.id,
|
||||
files={fname: fdata.content_before_pr for fname, fdata in entry.files.items()},
|
||||
diffs=entry.diffs_before,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class CodeRefinementEntry:
|
||||
id: str
|
||||
files: Dict[str, str] # filename -> file content
|
||||
diffs: Dict[str, str] # filename -> diff, diffs between the opening of the PR and the comment
|
||||
comments: List[Comment]
|
||||
|
||||
@staticmethod
|
||||
def from_entry(entry: DatasetEntry) -> "CodeRefinementEntry":
|
||||
return CodeRefinementEntry(
|
||||
id=entry.metadata.id,
|
||||
files={fname: fdata.content_before_pr for fname, fdata in entry.files.items()},
|
||||
diffs=entry.diffs_before,
|
||||
comments=entry.comments,
|
||||
)
|
||||
|
||||
class OutputType(Enum):
|
||||
FULL = "full"
|
||||
CODE_REFINEMENT = "code_refinement"
|
||||
COMMENT_GEN = "comment_gen"
|
||||
|
||||
# fmt: on
|
||||
@dataclass
|
||||
class Dataset:
|
||||
entries: List[DatasetEntry] = field(default_factory=list)
|
||||
|
||||
def __len__(self) -> int:
|
||||
return sum(1 for entry in self.entries if entry.metadata.successful)
|
||||
|
||||
def to_json(
|
||||
self,
|
||||
filename: str,
|
||||
type_: OutputType = OutputType.FULL,
|
||||
remove_non_suggesting: bool = False,
|
||||
) -> None:
|
||||
"""Serialize the dataset to a JSON file"""
|
||||
|
||||
entries_to_dump = self.entries
|
||||
|
||||
if type_ == OutputType.COMMENT_GEN:
|
||||
entries_to_dump = [
|
||||
entry
|
||||
for entry in self.entries
|
||||
if entry.metadata.selection and entry.metadata.selection.comment_suggests_change
|
||||
]
|
||||
elif type_ == OutputType.CODE_REFINEMENT:
|
||||
entries_to_dump = [
|
||||
entry
|
||||
for entry in self.entries
|
||||
if entry.metadata.selection
|
||||
and entry.metadata.selection.diff_after_address_change
|
||||
and entry.metadata.selection.is_code_related
|
||||
]
|
||||
elif type_ == OutputType.FULL and remove_non_suggesting:
|
||||
entries_to_dump = [
|
||||
entry
|
||||
for entry in self.entries
|
||||
if entry.metadata.selection and entry.metadata.selection.comment_suggests_change
|
||||
]
|
||||
|
||||
to_dump = Dataset(entries=entries_to_dump)
|
||||
# print(f"{len(entries_to_dump)} entries...", end=" ", flush=True)
|
||||
|
||||
def transform_entry(entry: Union[DatasetEntry, Dataset, Any]) -> Union[dict, list]:
|
||||
if not isinstance(entry, (DatasetEntry, Dataset)):
|
||||
return entry.__dict__
|
||||
|
||||
if type_ == OutputType.FULL:
|
||||
return entry.__dict__
|
||||
|
||||
if isinstance(entry, Dataset):
|
||||
return entry.entries
|
||||
|
||||
if type_ == OutputType.COMMENT_GEN:
|
||||
return CommentGenEntry.from_entry(entry).__dict__
|
||||
|
||||
if type_ == OutputType.CODE_REFINEMENT:
|
||||
return CodeRefinementEntry.from_entry(entry).__dict__
|
||||
|
||||
with open(filename, "w", encoding="utf-8") as f:
|
||||
json.dump(to_dump, f, default=transform_entry, indent=4)
|
||||
|
||||
@staticmethod
|
||||
def from_json(filename: str, keep_still_in_progress: bool = False) -> "Dataset":
|
||||
with open(filename, "r", encoding="utf-8") as f:
|
||||
print(f"Loading dataset from {filename}...", end=" ", flush=True)
|
||||
data = json.load(f)
|
||||
print("Done")
|
||||
|
||||
entries = []
|
||||
for entry_data in data["entries"]:
|
||||
metadata_data = entry_data["metadata"]
|
||||
selection_data = metadata_data["selection"] if "selection" in metadata_data else None
|
||||
selection = Selection(**selection_data) if selection_data else None
|
||||
metadata_data["selection"] = selection
|
||||
if "id" not in metadata_data:
|
||||
metadata_data["id"] = uuid.uuid4().hex
|
||||
metadata = Metadata(**metadata_data)
|
||||
|
||||
if (
|
||||
not keep_still_in_progress
|
||||
and metadata.reason_for_failure == "Was still being processed"
|
||||
):
|
||||
continue
|
||||
|
||||
files = {fname: FileData(**fdata) for fname, fdata in entry_data["files"].items()}
|
||||
|
||||
comments = [Comment(**comment) for comment in entry_data["comments"]]
|
||||
|
||||
entry = DatasetEntry(
|
||||
metadata=metadata,
|
||||
files=files,
|
||||
diffs_before=entry_data["diffs_before"],
|
||||
comments=comments,
|
||||
diffs_after=entry_data["diffs_after"],
|
||||
)
|
||||
entries.append(entry)
|
||||
|
||||
return Dataset(entries=entries)
|
||||
|
||||
def build_reference_map(self) -> Dict[str, DatasetEntry]:
|
||||
"""Build a reference map for the dataset"""
|
||||
|
||||
ref_map = {}
|
||||
for entry in self.entries:
|
||||
ref_map[entry.metadata.id] = entry
|
||||
return ref_map
|
622
src/utils/handlers.py
Normal file
622
src/utils/handlers.py
Normal file
@ -0,0 +1,622 @@
|
||||
from abc import ABC, abstractmethod
|
||||
import os, re, docker, signal, javalang
|
||||
from bs4 import BeautifulSoup
|
||||
from typing import Iterable, Tuple, Iterator
|
||||
import xml.etree.ElementTree as ET
|
||||
from javalang.tree import PackageDeclaration
|
||||
import tarfile
|
||||
import tempfile
|
||||
from shutil import rmtree
|
||||
|
||||
REPORT_SIZE_THRESHOLD = 400 # less than 400 bytes (charcaters), we don't care about it
|
||||
|
||||
|
||||
USER_ID = os.getuid() # for container user
|
||||
GROUP_ID = os.getgid()
|
||||
|
||||
|
||||
class BuildHandler(ABC):
|
||||
def __init__(self, repo_path: str, build_file: str, updates: dict) -> None:
|
||||
super().__init__()
|
||||
self.path: str = repo_path
|
||||
self.build_file: str = build_file
|
||||
self.updates = updates
|
||||
|
||||
def set_client(self, client: docker.DockerClient):
|
||||
self.client = client
|
||||
|
||||
def __enter__(self):
|
||||
self.container = self.client.containers.run(
|
||||
image=self.container_name(),
|
||||
command="tail -f /dev/null", # to keep the container alive
|
||||
volumes={os.path.abspath(self.path): {"bind": "/repo", "mode": "rw"}},
|
||||
user=f"{USER_ID}:{GROUP_ID}",
|
||||
detach=True,
|
||||
tty=True,
|
||||
)
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.container.kill()
|
||||
self.container.remove()
|
||||
rmtree(self.path)
|
||||
|
||||
def compile_repo(self) -> None:
|
||||
def timeout_handler(signum, frame):
|
||||
raise TimeoutError("Tests exceeded time limit")
|
||||
|
||||
signal.signal(signal.SIGALRM, timeout_handler)
|
||||
signal.alarm(3600) # Set timeout to 1 hour (3600 seconds)
|
||||
|
||||
try:
|
||||
exec_result = self.container.exec_run(self.compile_cmd())
|
||||
output = clean_output(exec_result.output)
|
||||
if exec_result.exit_code != 0:
|
||||
raise FailedToCompileError(output)
|
||||
except TimeoutError:
|
||||
self.updates["compiled_successfully"] = False
|
||||
self.updates[
|
||||
"error_msg"
|
||||
] = "Compile process killed due to exceeding the 1-hour time limit"
|
||||
finally:
|
||||
signal.alarm(0) # Cancel the alarm
|
||||
|
||||
def test_repo(self) -> None:
|
||||
def timeout_handler(signum, frame):
|
||||
raise TimeoutError("Tests exceeded time limit")
|
||||
|
||||
signal.signal(signal.SIGALRM, timeout_handler)
|
||||
signal.alarm(3600) # Set timeout to 1 hour (3600 seconds)
|
||||
|
||||
try:
|
||||
exec_result = self.container.exec_run(self.test_cmd())
|
||||
output = clean_output(exec_result.output)
|
||||
if exec_result.exit_code != 0:
|
||||
raise FailedToTestError(output)
|
||||
|
||||
self.extract_test_numbers(output)
|
||||
|
||||
except TimeoutError:
|
||||
self.updates["tested_successfully"] = False
|
||||
self.updates["error_msg"] = "Test process killed due to exceeding the 1-hour time limit"
|
||||
return
|
||||
|
||||
finally:
|
||||
signal.alarm(0) # Cancel the alarm
|
||||
|
||||
def generate_coverage_report(self, already_injected_manually: bool = False):
|
||||
result = self.container.exec_run(self.generate_coverage_report_cmd())
|
||||
if result.exit_code != 0:
|
||||
if already_injected_manually:
|
||||
raise CantExecJacoco(clean_output(result.output))
|
||||
|
||||
build_file_path = os.path.join(self.path, self.build_file)
|
||||
if not os.path.exists(build_file_path):
|
||||
raise CantInjectJacoco("pom.xml not found")
|
||||
with open(build_file_path, "r") as f:
|
||||
og_content = f.read()
|
||||
try:
|
||||
self._try_to_inject_jacoco(build_file_path)
|
||||
self.generate_coverage_report(already_injected_manually=True)
|
||||
except (CantInjectJacoco, CantExecJacoco) as e:
|
||||
with open(build_file_path, "w") as f:
|
||||
f.write(og_content)
|
||||
raise e
|
||||
|
||||
@abstractmethod
|
||||
def _try_to_inject_jacoco(self, build_file_path: str) -> None:
|
||||
pass
|
||||
|
||||
def check_coverage(self, filename: str) -> Iterator[Tuple[str, float]]:
|
||||
"""
|
||||
Check if the given filename is covered by JaCoCo.
|
||||
"""
|
||||
found_at_least_one = False
|
||||
candidates = []
|
||||
for coverage_report_path in self.get_jacoco_report_paths():
|
||||
if not os.path.exists(coverage_report_path):
|
||||
raise NoCoverageReportFound(
|
||||
f"Coverage report file '{coverage_report_path}' does not exist"
|
||||
)
|
||||
|
||||
fully_qualified_class = self._extract_fully_qualified_class(filename)
|
||||
candidates.append({"report_file": coverage_report_path, "fqc": fully_qualified_class})
|
||||
# if coverage_report_path[:len(src_dir)] != src_dir:
|
||||
# continue
|
||||
coverage = get_coverage_for_file(
|
||||
coverage_report_path, fully_qualified_class, os.path.basename(filename)
|
||||
)
|
||||
if coverage != -1:
|
||||
found_at_least_one = True
|
||||
yield coverage_report_path, coverage
|
||||
|
||||
if not found_at_least_one:
|
||||
raise FileNotCovered(
|
||||
f"File '{filename}' didn't have any coverage in any of the jacoco reports: {candidates}"
|
||||
)
|
||||
|
||||
def _extract_fully_qualified_class(self, filepath: str) -> str:
|
||||
if not filepath.endswith('.java'):
|
||||
raise NotJavaFileError(f"File '{filepath}' does not end with .java")
|
||||
|
||||
if not os.path.exists(os.path.join(self.path, filepath)):
|
||||
raise FileNotFoundInRepoError(f"File '{filepath}' not found in repo")
|
||||
|
||||
with open(os.path.join(self.path, filepath)) as f:
|
||||
try:
|
||||
parsed_tree = javalang.parse.parse(f.read())
|
||||
except javalang.parser.JavaSyntaxError as e:
|
||||
raise NotJavaFileError(
|
||||
f"File '{filepath}' has a syntax error and could not be parsed by javalang, raised error: '{e}'"
|
||||
)
|
||||
|
||||
package_name = None
|
||||
for _, node in parsed_tree.filter(PackageDeclaration):
|
||||
package_name = node.name # type: ignore
|
||||
break # Stop after finding the first package declaration
|
||||
|
||||
if package_name is None:
|
||||
raise NoPackageFoundError(
|
||||
f"File '{filepath}' did not have a packaged name recognized by javalang"
|
||||
)
|
||||
|
||||
fully_qualified_class = package_name.replace('.', '/')
|
||||
# src_dir = filepath[:filepath.index(fully_qualified_class)]
|
||||
fully_qualified_class += "/" + os.path.basename(filepath)[:-5] # -5 to remove '.java'
|
||||
return fully_qualified_class
|
||||
|
||||
def clean_repo(self) -> None:
|
||||
self.container.exec_run(self.clean_cmd())
|
||||
|
||||
@abstractmethod
|
||||
def get_type(self) -> str:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def compile_cmd(self) -> str:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def test_cmd(self) -> str:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def extract_test_numbers(self, output: str) -> None:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def clean_cmd(self) -> str:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def generate_coverage_report_cmd(self) -> str:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_jacoco_report_paths(self) -> Iterable[str]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def container_name(self) -> str:
|
||||
pass
|
||||
|
||||
|
||||
class MavenHandler(BuildHandler):
|
||||
def __init__(self, repo_path: str, build_file: str, updates: dict = {}) -> None:
|
||||
super().__init__(repo_path, build_file, updates)
|
||||
self.base_cmd = "mvn -B -Dstyle.color=never -Dartifact.download.skip=true"
|
||||
# -B (Batch Mode): Runs Maven in non-interactive mode, reducing output and removing download progress bars.
|
||||
# -Dstyle.color=never: Disables ANSI colors.
|
||||
# -Dartifact.download.skip=true: Prevents Maven from printing download logs (but still downloads dependencies when needed).
|
||||
|
||||
def get_type(self) -> str:
|
||||
return "maven"
|
||||
|
||||
def compile_cmd(self) -> str:
|
||||
return f"{self.base_cmd} clean compile"
|
||||
|
||||
def test_cmd(self) -> str:
|
||||
return f"{self.base_cmd} test"
|
||||
|
||||
def clean_cmd(self) -> str:
|
||||
return f"{self.base_cmd} clean"
|
||||
|
||||
def generate_coverage_report_cmd(self):
|
||||
return f"{self.base_cmd} jacoco:report-aggregate"
|
||||
|
||||
def container_name(self) -> str:
|
||||
return "crab-maven"
|
||||
|
||||
def extract_test_numbers(self, output: str) -> None:
|
||||
pattern = r"\[INFO\] Results:\n\[INFO\]\s*\n\[INFO\] Tests run: (\d+), Failures: (\d+), Errors: (\d+), Skipped: (\d+)"
|
||||
|
||||
matches = re.findall(pattern, output)
|
||||
|
||||
self.updates["n_tests"] = 0
|
||||
self.updates["n_tests_passed"] = 0 # Passed tests = Tests run - (Failures + Errors)
|
||||
self.updates["n_tests_failed"] = 0
|
||||
self.updates["n_tests_errors"] = 0
|
||||
self.updates["n_tests_skipped"] = 0
|
||||
|
||||
if len(matches) == 0:
|
||||
raise NoTestResultsToExtractError("No test results found in Maven output:\n" + output)
|
||||
|
||||
for match in matches:
|
||||
tests_run, failures, errors, skipped = map(int, match)
|
||||
self.updates["n_tests"] += tests_run
|
||||
self.updates["n_tests_failed"] += failures
|
||||
self.updates["n_tests_errors"] += errors
|
||||
self.updates["n_tests_skipped"] += skipped
|
||||
self.updates["n_tests_passed"] += tests_run - (
|
||||
failures + errors
|
||||
) # Calculate passed tests
|
||||
|
||||
def get_jacoco_report_paths(self) -> Iterable[str]:
|
||||
found_at_least_one = False
|
||||
for root, _, files in os.walk(os.path.join(self.path)):
|
||||
if "target/site" not in root:
|
||||
continue # to avoid any misleading jacoco.xml randomly lying around
|
||||
for file in files:
|
||||
if file == "jacoco.xml":
|
||||
found_at_least_one = True
|
||||
yield os.path.join(root, file)
|
||||
if not found_at_least_one:
|
||||
raise NoCoverageReportFound(f"Couldn't find any 'jacoco.xml' in {self.path}")
|
||||
|
||||
def _try_to_inject_jacoco(self, build_file_path: str) -> None:
|
||||
with open(build_file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
if "<artifactId>jacoco-maven-plugin</artifactId>" in content:
|
||||
return # already present
|
||||
|
||||
jacoco_plugin = """
|
||||
<plugin>
|
||||
<groupId>org.jacoco</groupId>
|
||||
<artifactId>jacoco-maven-plugin</artifactId>
|
||||
<version>0.8.8</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>prepare-agent</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>report</id>
|
||||
<phase>test</phase>
|
||||
<goals>
|
||||
<goal>report</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
"""
|
||||
|
||||
if "<plugins>" in content:
|
||||
# just insert inside existing plugins
|
||||
content = content.replace("<plugins>", f"<plugins>\n{jacoco_plugin}")
|
||||
elif "</project>" in content:
|
||||
# plugins section doesn't exist, create full <build> section
|
||||
build_block = f"""
|
||||
<build>
|
||||
<plugins>
|
||||
{jacoco_plugin}
|
||||
</plugins>
|
||||
</build>
|
||||
"""
|
||||
content = content.replace("</project>", f"{build_block}\n</project>")
|
||||
else:
|
||||
raise CantInjectJacoco("Could not find insertion point for plugins in pom.xml")
|
||||
|
||||
with open(build_file_path, "w", encoding="utf-8") as f:
|
||||
f.write(content)
|
||||
|
||||
|
||||
class GradleHandler(BuildHandler):
|
||||
def __init__(self, repo_path: str, build_file: str, updates: dict = {}) -> None:
|
||||
super().__init__(repo_path, build_file, updates)
|
||||
self.base_cmd = "gradle --no-daemon --console=plain"
|
||||
|
||||
def get_type(self) -> str:
|
||||
return "gradle"
|
||||
|
||||
def compile_cmd(self) -> str:
|
||||
return f"{self.base_cmd} compileJava"
|
||||
|
||||
def test_cmd(self) -> str:
|
||||
return f"{self.base_cmd} test"
|
||||
|
||||
def clean_cmd(self) -> str:
|
||||
return f"{self.base_cmd} clean"
|
||||
|
||||
def generate_coverage_report_cmd(self) -> str:
|
||||
return f"{self.base_cmd} jacocoTestReport"
|
||||
|
||||
def container_name(self) -> str:
|
||||
return "crab-gradle"
|
||||
|
||||
def extract_test_numbers(self, output: str) -> None:
|
||||
self.updates["n_tests"] = -1
|
||||
self.updates["n_tests_passed"] = -1
|
||||
self.updates["n_tests_failed"] = -1
|
||||
self.updates["n_tests_errors"] = -1
|
||||
self.updates["n_tests_skipped"] = -1
|
||||
|
||||
test_results_path = os.path.join(self.path, "build/reports/tests/test/index.html")
|
||||
if not os.path.exists(test_results_path):
|
||||
raise NoTestResultsToExtractError(
|
||||
"No test results found (prolly a repo with sub-projects)"
|
||||
)
|
||||
|
||||
# Load the HTML file
|
||||
with open(test_results_path, "r") as file:
|
||||
soup = BeautifulSoup(file, "html.parser")
|
||||
|
||||
# test_div = soup.select_one("div", class_="infoBox", id="tests")
|
||||
test_div = soup.select_one("div.infoBox#tests")
|
||||
if test_div is None:
|
||||
raise NoTestResultsToExtractError("No test results found (no div.infoBox#tests)")
|
||||
|
||||
# counter_div = test_div.find("div", class_="counter")
|
||||
counter_div = test_div.select_one("div.counter")
|
||||
if counter_div is None:
|
||||
raise NoTestResultsToExtractError(
|
||||
"No test results found (not div.counter for tests)"
|
||||
)
|
||||
|
||||
self.updates["n_tests"] = int(counter_div.text.strip())
|
||||
|
||||
# failures_div = soup.find("div", class_="infoBox", id="failures")
|
||||
failures_div = soup.select_one("div.infoBox#failures")
|
||||
if failures_div is None:
|
||||
raise NoTestResultsToExtractError("No test results found (no div.infoBox#failures)")
|
||||
|
||||
# counter_div = failures_div.find("div", class_="counter")
|
||||
counter_div = failures_div.select_one("div.counter")
|
||||
if counter_div is None:
|
||||
raise NoTestResultsToExtractError(
|
||||
"No test results found (not div.counter for failures)"
|
||||
)
|
||||
|
||||
self.updates["n_tests_failed"] = int(counter_div.text.strip())
|
||||
|
||||
# Calculate passed tests
|
||||
self.updates["n_tests_passed"] = (
|
||||
self.updates["n_tests"] - self.updates["n_tests_failed"]
|
||||
)
|
||||
|
||||
def get_jacoco_report_paths(self) -> Iterable[str]:
|
||||
found_at_least_one = False
|
||||
for root, _, files in os.walk(os.path.join(self.path)):
|
||||
if "reports/jacoco" not in root:
|
||||
continue
|
||||
for file in files:
|
||||
if file == "index.html":
|
||||
found_at_least_one = True
|
||||
yield os.path.join(root, file)
|
||||
if not found_at_least_one:
|
||||
raise NoCoverageReportFound(
|
||||
f"Couldn't find any 'index.html' inside any 'reports/jacoco' in {self.path}"
|
||||
)
|
||||
|
||||
def _try_to_inject_jacoco(self, build_file_path: str) -> None:
|
||||
with open(build_file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
if "id 'jacoco'" in content or "apply plugin: 'jacoco'" in content:
|
||||
return # already present
|
||||
|
||||
jacoco_snippet = """
|
||||
plugins {
|
||||
id 'jacoco'
|
||||
}
|
||||
|
||||
jacoco {
|
||||
toolVersion = "0.8.8"
|
||||
}
|
||||
|
||||
test {
|
||||
finalizedBy jacocoTestReport
|
||||
}
|
||||
|
||||
jacocoTestReport {
|
||||
dependsOn test
|
||||
reports {
|
||||
xml.required = true
|
||||
html.required = true
|
||||
}
|
||||
}"""
|
||||
|
||||
content = jacoco_snippet + "\n\n" + content
|
||||
|
||||
with open(build_file_path, "w", encoding="utf-8") as f:
|
||||
f.write(content)
|
||||
|
||||
|
||||
class SetupException(Exception, ABC):
|
||||
reason_for_failure: str
|
||||
|
||||
|
||||
class NotValidDirectory(SetupException):
|
||||
reason_for_failure = "The directory is not valid"
|
||||
|
||||
|
||||
class CantFindBuildFile(SetupException):
|
||||
reason_for_failure = "Couldn't find the build file in the directory"
|
||||
|
||||
|
||||
class HandlerException(Exception, ABC):
|
||||
reason_for_failure = "Generic handler expection (this shouldn't appear)"
|
||||
|
||||
|
||||
class NoTestsFoundError(HandlerException):
|
||||
reason_for_failure = "No tests found"
|
||||
|
||||
|
||||
class FailedToCompileError(HandlerException):
|
||||
reason_for_failure = "Failed to compile"
|
||||
|
||||
|
||||
class FailedToTestError(HandlerException):
|
||||
reason_for_failure = "Failed to test"
|
||||
|
||||
|
||||
class NoTestResultsToExtractError(HandlerException):
|
||||
reason_for_failure = "Failed to extract test results"
|
||||
|
||||
|
||||
class CantExecJacoco(HandlerException):
|
||||
reason_for_failure = "Couldn't execute jacoco"
|
||||
|
||||
|
||||
class CantInjectJacoco(HandlerException):
|
||||
reason_for_failure = "Couldn't inject jacoco in the build file"
|
||||
|
||||
|
||||
class NoCoverageReportFound(HandlerException):
|
||||
reason_for_failure = "No coverage report was found"
|
||||
|
||||
|
||||
class FileNotCovered(HandlerException):
|
||||
reason_for_failure = "Commented file from the PR wasn't not covered"
|
||||
|
||||
|
||||
class GradleAggregateReportNotFound(HandlerException):
|
||||
reason_for_failure = "Couldn't find the aggregate report (with gradle it's messy)"
|
||||
|
||||
|
||||
class NotJavaFileError(HandlerException):
|
||||
reason_for_failure = "File that was checked for coverage was not java file"
|
||||
|
||||
|
||||
class NoPackageFoundError(HandlerException):
|
||||
reason_for_failure = "Java file did not contain a valid package name"
|
||||
|
||||
|
||||
class FileNotFoundInRepoError(HandlerException):
|
||||
reason_for_failure = "Commented file not found in repo (likely renamed or deleted)"
|
||||
|
||||
|
||||
def merge_download_lines(lines: list) -> list:
|
||||
"""
|
||||
Merges lines that are part of the same download block in Maven output.
|
||||
|
||||
Args:
|
||||
lines (list): The lines to merge.
|
||||
|
||||
Returns:
|
||||
list: The merged lines.
|
||||
"""
|
||||
downloading_block = False
|
||||
cleaned_lines = []
|
||||
for line in lines:
|
||||
if re.match(r"\[INFO\] Download(ing|ed) from", line):
|
||||
if not downloading_block:
|
||||
cleaned_lines.append("[CRAB] Downloading stuff")
|
||||
downloading_block = True
|
||||
else:
|
||||
cleaned_lines.append(line)
|
||||
downloading_block = False
|
||||
return cleaned_lines
|
||||
|
||||
|
||||
def merge_unapproved_licences(lines: list) -> list:
|
||||
"""
|
||||
Merges lines that are part of the same unapproved licences block in Maven output.
|
||||
|
||||
Args:
|
||||
lines (list): The lines to merge.
|
||||
|
||||
Returns:
|
||||
list: The merged lines.
|
||||
"""
|
||||
licenses_block = False
|
||||
cleaned_lines = []
|
||||
for line in lines:
|
||||
if re.match(r"\[WARNING\] Files with unapproved licenses:", line):
|
||||
cleaned_lines.append(line)
|
||||
cleaned_lines.append("[CRAB] List of all the unapproved licenses...")
|
||||
licenses_block = True
|
||||
elif licenses_block and not re.match(r"\s+\?\/\.m2\/repository", line):
|
||||
licenses_block = False
|
||||
|
||||
if not licenses_block:
|
||||
cleaned_lines.append(line)
|
||||
return cleaned_lines
|
||||
|
||||
|
||||
def clean_output(output: bytes) -> str:
|
||||
output_lines = output.decode().split("\n")
|
||||
|
||||
cleaned_lines = merge_download_lines(output_lines)
|
||||
cleaned_lines = merge_unapproved_licences(cleaned_lines)
|
||||
|
||||
return "\n".join(cleaned_lines)
|
||||
|
||||
|
||||
def get_coverage_for_file(xml_file: str, target_fully_qualified_class: str, basename: str) -> float:
|
||||
# Parse the XML file
|
||||
tree = ET.parse(xml_file)
|
||||
root = tree.getroot()
|
||||
|
||||
# Find coverage for the target file
|
||||
for package in root.findall(".//package"):
|
||||
for class_ in package.findall("class"):
|
||||
if (
|
||||
class_.get("sourcefilename") == basename
|
||||
and class_.get("name") == target_fully_qualified_class
|
||||
):
|
||||
# Extract line coverage data
|
||||
line_counter = class_.find("counter[@type='LINE']")
|
||||
if line_counter is not None:
|
||||
counter = line_counter.get("missed")
|
||||
assert isinstance(counter, str)
|
||||
missed = int(counter)
|
||||
counter = line_counter.get("covered")
|
||||
assert isinstance(counter, str)
|
||||
covered = int(counter)
|
||||
total = missed + covered
|
||||
coverage = (covered / total) * 100 if total > 0 else 0
|
||||
return coverage
|
||||
return -1
|
||||
|
||||
|
||||
def get_build_handler(root: str, repo: str, verbose: bool = False) -> BuildHandler:
|
||||
"""
|
||||
Get a BuildHandler for a repository, where `repo` may be either:
|
||||
|
||||
- a directory under `root`, or
|
||||
- a .tar.gz/.tgz file in `root` containing your repo
|
||||
|
||||
Returns:
|
||||
an instance of GradleHandler or MavenHandler
|
||||
"""
|
||||
path = os.path.join(root, repo)
|
||||
|
||||
# 1) If it's a tarball, extract it
|
||||
if os.path.isfile(path) and tarfile.is_tarfile(path):
|
||||
if verbose:
|
||||
print(f"Archive detected: extracting {path}…")
|
||||
path = tempfile.mkdtemp(prefix="repo_")
|
||||
with tarfile.open(path, "r:gz") as tar:
|
||||
tar.extractall(path)
|
||||
|
||||
# 2) Otherwise it must be a directory
|
||||
elif not os.path.isdir(path):
|
||||
raise NotValidDirectory(f"The path {path!r} is neither a directory nor a tar archive.")
|
||||
|
||||
# 3) Now scan for build files
|
||||
to_keep = {"pom.xml", "build.gradle"}
|
||||
for entry in os.scandir(path):
|
||||
if entry.is_file() and entry.name in to_keep:
|
||||
if verbose:
|
||||
print(f"Found {entry.name!r} in {path!r}, returning handler")
|
||||
|
||||
if entry.name == "build.gradle":
|
||||
return GradleHandler(path, entry.name)
|
||||
else:
|
||||
return MavenHandler(path, entry.name)
|
||||
|
||||
if os.path.exists(path) and os.path.isdir(path):
|
||||
rmtree(path)
|
||||
|
||||
raise CantFindBuildFile(f"Could not find any of {sorted(to_keep)} in {path!r}")
|
Reference in New Issue
Block a user