using expections to interrupt the flow of

execution instead of having to propagate a boolean
This commit is contained in:
Karma Riuk
2025-03-14 16:10:54 +01:00
parent b04d5b410a
commit 082f6c6f71
2 changed files with 67 additions and 63 deletions

View File

@ -4,7 +4,7 @@ from tqdm import tqdm
import shutil import shutil
from datetime import datetime from datetime import datetime
from handlers import get_build_handler from handlers import FailedToCompileError, FailedToTestError, NoTestsFoundError, NoTestResultsToExtractError, get_build_handler
from utils import clone from utils import clone
tqdm.pandas() tqdm.pandas()
@ -58,31 +58,48 @@ def process_row(repo, client, dest: str, updates: dict, force: bool = False, ver
build_handler.set_client(client) build_handler.set_client(client)
with build_handler: with build_handler:
pbar.set_postfix_str("Checking for tests...") try:
if not build_handler.has_tests(): pbar.set_postfix_str("Checking for tests...")
if verbose: print(f"Removing {repo}, no test suites") build_handler.check_for_tests()
pbar.update(1)
pbar.set_postfix_str("Compiling...")
build_handler.compile_repo()
updates["compiled_successfully"] = True
pbar.update(1)
pbar.set_postfix_str("Running tests...")
build_handler.test_repo()
updates["tested_successfully"] = True
pbar.update(1)
build_handler.clean_repo()
# If repo was not removed, then it is a good repo
updates["good_repo_for_crab"] = True
except NoTestsFoundError as e:
updates["error_msg"] = str(e)
if verbose: print(f"Removing {repo}, error: no tests found")
remove_dir(repo_path) remove_dir(repo_path)
return return
if verbose: print(f"Keeping {repo}") except FailedToCompileError as e:
pbar.update(1) updates["error_msg"] = str(e)
updates["compiled_successfully"] = False
pbar.set_postfix_str("Compiling...") if verbose: print(f"Removing {repo}, error: failed to compile")
if not build_handler.compile_repo():
if verbose: print(f"Removing {repo}, failed to compile")
remove_dir(repo_path) remove_dir(repo_path)
return return
pbar.update(1) except FailedToTestError as e:
updates["error_msg"] = str(e)
pbar.set_postfix_str("Running tests...") updates["tested_successfully"] = False
if not build_handler.test_repo(): if verbose: print(f"Removing {repo}, error: failed to run tests")
if verbose: print(f"Removing {repo}, failed to run tests") remove_dir(repo_path)
return
except NoTestResultsToExtractError as e:
updates["error_msg"] = str(e)
if verbose: print(f"Removing {repo}, error: failed to extract test results")
remove_dir(repo_path) remove_dir(repo_path)
return return
build_handler.clean_repo()
pbar.update(1)
# If repo was not removed, then it is a good repo
updates["good_repo_for_crab"] = True
def save_df_with_updates(df, updates_list, results_file: str, verbose=False): def save_df_with_updates(df, updates_list, results_file: str, verbose=False):
# Set the new data # Set the new data

View File

@ -32,19 +32,19 @@ class BuildHandler(ABC):
self.container.remove() self.container.remove()
def has_tests(self) -> bool: def check_for_tests(self) -> None:
with open(os.path.join(self.path, self.build_file), "r") as f: with open(os.path.join(self.path, self.build_file), "r") as f:
content = f.read() content = f.read()
for library in ["junit", "testng", "mockito"]: for library in ["junit", "testng", "mockito"]:
if library in content: if library in content:
self.updates["detected_source_of_tests"] = library + " library in build file" self.updates["detected_source_of_tests"] = library + " library in build file"
return True return
for keyword in ["testImplementation", "functionalTests", "bwc_tests_enabled"]: for keyword in ["testImplementation", "functionalTests", "bwc_tests_enabled"]:
if keyword in content: if keyword in content:
self.updates["detected_source_of_tests"] = keyword + " keyword in build file" self.updates["detected_source_of_tests"] = keyword + " keyword in build file"
return False return
test_dirs = [ test_dirs = [
"src/test/java", "src/test/java",
@ -55,12 +55,10 @@ class BuildHandler(ABC):
for td in test_dirs: for td in test_dirs:
if os.path.exists(os.path.join(self.path, td)): if os.path.exists(os.path.join(self.path, td)):
self.updates["detected_source_of_tests"] = td + " dir exists in repo" self.updates["detected_source_of_tests"] = td + " dir exists in repo"
return True
self.updates["error_msg"] = "No tests found" raise NoTestsFoundError("No tests found")
return False
def compile_repo(self) -> bool: def compile_repo(self) -> None:
def timeout_handler(signum, frame): def timeout_handler(signum, frame):
raise TimeoutError("Tests exceeded time limit") raise TimeoutError("Tests exceeded time limit")
@ -71,21 +69,14 @@ class BuildHandler(ABC):
exec_result = self.container.exec_run(self.compile_cmd()) exec_result = self.container.exec_run(self.compile_cmd())
output = clean_output(exec_result.output) output = clean_output(exec_result.output)
if exec_result.exit_code != 0: if exec_result.exit_code != 0:
self.updates["compiled_successfully"] = False raise FailedToCompileError(output)
self.updates["error_msg"] = output
return False
self.updates["compiled_successfully"] = True
return True
except TimeoutError: except TimeoutError:
self.updates["compiled_successfully"] = False self.updates["compiled_successfully"] = False
self.updates["error_msg"] = "Compile process killed due to exceeding the 1-hour time limit" self.updates["error_msg"] = "Compile process killed due to exceeding the 1-hour time limit"
return False
finally: finally:
signal.alarm(0) # Cancel the alarm signal.alarm(0) # Cancel the alarm
def test_repo(self) -> bool: def test_repo(self) -> None:
def timeout_handler(signum, frame): def timeout_handler(signum, frame):
raise TimeoutError("Tests exceeded time limit") raise TimeoutError("Tests exceeded time limit")
@ -96,19 +87,14 @@ class BuildHandler(ABC):
exec_result = self.container.exec_run(self.test_cmd()) exec_result = self.container.exec_run(self.test_cmd())
output = clean_output(exec_result.output) output = clean_output(exec_result.output)
if exec_result.exit_code != 0: if exec_result.exit_code != 0:
self.updates["tested_successfully"] = False raise FailedToTestError(output)
self.updates["error_msg"] = output
return False
self.updates["tested_successfully"] = True self.extract_test_numbers(output)
self.updates["error_msg"] = output
return self.extract_test_numbers(output)
except TimeoutError: except TimeoutError:
self.updates["tested_successfully"] = False self.updates["tested_successfully"] = False
self.updates["error_msg"] = "Test process killed due to exceeding the 1-hour time limit" self.updates["error_msg"] = "Test process killed due to exceeding the 1-hour time limit"
return False return
finally: finally:
signal.alarm(0) # Cancel the alarm signal.alarm(0) # Cancel the alarm
@ -126,7 +112,7 @@ class BuildHandler(ABC):
pass pass
@abstractmethod @abstractmethod
def extract_test_numbers(self, output: str) -> bool: def extract_test_numbers(self, output: str) -> None:
pass pass
@abstractmethod @abstractmethod
@ -157,7 +143,7 @@ class MavenHandler(BuildHandler):
def container_name(self) -> str: def container_name(self) -> str:
return "crab-maven" return "crab-maven"
def extract_test_numbers(self, output: str) -> bool: def extract_test_numbers(self, output: str) -> None:
pattern = r"\[INFO\] Results:\n\[INFO\]\s*\n\[INFO\] Tests run: (\d+), Failures: (\d+), Errors: (\d+), Skipped: (\d+)" pattern = r"\[INFO\] Results:\n\[INFO\]\s*\n\[INFO\] Tests run: (\d+), Failures: (\d+), Errors: (\d+), Skipped: (\d+)"
matches = re.findall(pattern, output) matches = re.findall(pattern, output)
@ -169,8 +155,7 @@ class MavenHandler(BuildHandler):
self.updates["n_tests_skipped"] = 0 self.updates["n_tests_skipped"] = 0
if len(matches) == 0: if len(matches) == 0:
self.updates["error_msg"] = "No test results found in Maven output:\n" + output raise NoTestResultsToExtractError("No test results found in Maven output:\n" + output)
return False
for match in matches: for match in matches:
tests_run, failures, errors, skipped = map(int, match) tests_run, failures, errors, skipped = map(int, match)
@ -180,10 +165,6 @@ class MavenHandler(BuildHandler):
self.updates["n_tests_skipped"] += skipped self.updates["n_tests_skipped"] += skipped
self.updates["n_tests_passed"] += (tests_run - (failures + errors)) # Calculate passed tests self.updates["n_tests_passed"] += (tests_run - (failures + errors)) # Calculate passed tests
return True
class GradleHandler(BuildHandler): class GradleHandler(BuildHandler):
def __init__(self, repo_path: str, build_file: str, updates: dict) -> None: def __init__(self, repo_path: str, build_file: str, updates: dict) -> None:
super().__init__(repo_path, build_file, updates) super().__init__(repo_path, build_file, updates)
@ -201,7 +182,7 @@ class GradleHandler(BuildHandler):
def container_name(self) -> str: def container_name(self) -> str:
return "crab-gradle" return "crab-gradle"
def extract_test_numbers(self, output: str) -> bool: def extract_test_numbers(self, output: str) -> None:
self.updates["n_tests"] = -1 self.updates["n_tests"] = -1
self.updates["n_tests_passed"] = -1 self.updates["n_tests_passed"] = -1
self.updates["n_tests_failed"] = -1 self.updates["n_tests_failed"] = -1
@ -210,8 +191,7 @@ class GradleHandler(BuildHandler):
test_results_path = os.path.join(self.path, "build/reports/tests/test/index.html") test_results_path = os.path.join(self.path, "build/reports/tests/test/index.html")
if not os.path.exists(test_results_path): if not os.path.exists(test_results_path):
self.updates["error_msg"] = "No test results found (prolly a repo with sub-projects)" raise NoTestResultsToExtractError("No test results found (prolly a repo with sub-projects)")
return False
# Load the HTML file # Load the HTML file
with open(test_results_path, "r") as file: with open(test_results_path, "r") as file:
@ -219,31 +199,38 @@ class GradleHandler(BuildHandler):
test_div = soup.find("div", class_="infoBox", id="tests") test_div = soup.find("div", class_="infoBox", id="tests")
if test_div is None: if test_div is None:
self.updates["error_msg"] = "No test results found (no div.infoBox#tests)" raise NoTestResultsToExtractError("No test results found (no div.infoBox#tests)")
return False
counter_div = test_div.find("div", class_="counter") counter_div = test_div.find("div", class_="counter")
if counter_div is None: if counter_div is None:
self.updates["error_msg"] = "No test results found (not div.counter for tests)" raise NoTestResultsToExtractError("No test results found (not div.counter for tests)")
return False
self.updates["n_tests"] = int(counter_div.text.strip()) self.updates["n_tests"] = int(counter_div.text.strip())
failures_div = soup.find("div", class_="infoBox", id="failures") failures_div = soup.find("div", class_="infoBox", id="failures")
if failures_div is None: if failures_div is None:
self.updates["error_msg"] = "No test results found (no div.infoBox#failures)" raise NoTestResultsToExtractError("No test results found (no div.infoBox#failures)")
return False
counter_div = failures_div.find("div", class_="counter") counter_div = failures_div.find("div", class_="counter")
if counter_div is None: if counter_div is None:
self.updates["error_msg"] = "No test results found (not div.counter for failures)" raise NoTestResultsToExtractError("No test results found (not div.counter for failures)")
return False
self.updates["n_tests_failed"] = int(counter_div.text.strip()) self.updates["n_tests_failed"] = int(counter_div.text.strip())
# Calculate passed tests # Calculate passed tests
self.updates["n_tests_passed"] = self.updates["n_tests"] - self.updates["n_tests_failed"] self.updates["n_tests_passed"] = self.updates["n_tests"] - self.updates["n_tests_failed"]
return True
class NoTestsFoundError(Exception):
pass
class FailedToCompileError(Exception):
pass
class FailedToTestError(Exception):
pass
class NoTestResultsToExtractError(Exception):
pass
def merge_download_lines(lines: list) -> list: def merge_download_lines(lines: list) -> list:
""" """