using expections to interrupt the flow of

execution instead of having to propagate a boolean
2025-10-13 19:58:02 +02:00 · 2025-03-14 16:10:54 +01:00
parent b04d5b410a
commit 082f6c6f71
2 changed files with 67 additions and 63 deletions
--- a/clone_repos.py
+++ b/clone_repos.py
@@ -4,7 +4,7 @@ from tqdm import tqdm
 import shutil
 from datetime import datetime
-from handlers import get_build_handler
+from handlers import FailedToCompileError, FailedToTestError, NoTestsFoundError, NoTestResultsToExtractError, get_build_handler
 from utils import clone
 tqdm.pandas()
@@ -58,31 +58,48 @@ def process_row(repo, client, dest: str, updates: dict, force: bool = False, ver
        build_handler.set_client(client)
        with build_handler:
-            pbar.set_postfix_str("Checking for tests...")
+            try:
-            if not build_handler.has_tests():
+                pbar.set_postfix_str("Checking for tests...")
-                if verbose: print(f"Removing {repo}, no test suites")
+                build_handler.check_for_tests()
                pbar.update(1)
                pbar.set_postfix_str("Compiling...")
                build_handler.compile_repo()
                updates["compiled_successfully"] = True
                pbar.update(1)
                pbar.set_postfix_str("Running tests...")
                build_handler.test_repo()
                updates["tested_successfully"] = True
                pbar.update(1)
                build_handler.clean_repo()
                # If repo was not removed, then it is a good repo
                updates["good_repo_for_crab"] = True
            except NoTestsFoundError as e:
                updates["error_msg"] = str(e)
                if verbose: print(f"Removing {repo}, error: no tests found")
                remove_dir(repo_path)
                return
-            if verbose: print(f"Keeping {repo}")
+            except FailedToCompileError as e:
-            pbar.update(1)
+                updates["error_msg"] = str(e)
-
+                updates["compiled_successfully"] = False
-            pbar.set_postfix_str("Compiling...")
+                if verbose: print(f"Removing {repo}, error: failed to compile")
            if not build_handler.compile_repo():
                if verbose: print(f"Removing {repo}, failed to compile")
                remove_dir(repo_path)
                return
-            pbar.update(1)
+            except FailedToTestError as e:
-
+                updates["error_msg"] = str(e)
-            pbar.set_postfix_str("Running tests...")
+                updates["tested_successfully"] = False
-            if not build_handler.test_repo():
+                if verbose: print(f"Removing {repo}, error: failed to run tests")
-                if verbose: print(f"Removing {repo}, failed to run tests")
+                remove_dir(repo_path)
                return
            except NoTestResultsToExtractError as e:
                updates["error_msg"] = str(e)
                if verbose: print(f"Removing {repo}, error: failed to extract test results")
                remove_dir(repo_path)
                return
            build_handler.clean_repo()
            pbar.update(1)
            # If repo was not removed, then it is a good repo
            updates["good_repo_for_crab"] = True
 def save_df_with_updates(df, updates_list, results_file: str, verbose=False):
   # Set the new data
--- a/handlers.py
+++ b/handlers.py
@@ -32,19 +32,19 @@ class BuildHandler(ABC):
        self.container.remove()
-    def has_tests(self) -> bool:
+    def check_for_tests(self) -> None:
        with open(os.path.join(self.path, self.build_file), "r") as f:
            content = f.read()
            for library in ["junit", "testng", "mockito"]:
                if library in content:
                    self.updates["detected_source_of_tests"] = library + " library in build file"
-                    return True
+                    return
            for keyword in ["testImplementation", "functionalTests", "bwc_tests_enabled"]:
                if keyword in content:
                    self.updates["detected_source_of_tests"] = keyword + " keyword in build file"
-                    return False
+                    return
        test_dirs = [
            "src/test/java",
@@ -55,12 +55,10 @@ class BuildHandler(ABC):
        for td in test_dirs:
            if os.path.exists(os.path.join(self.path, td)):
                self.updates["detected_source_of_tests"] = td + " dir exists in repo"
                return True
-        self.updates["error_msg"] = "No tests found"
+        raise NoTestsFoundError("No tests found")
        return False
-    def compile_repo(self) -> bool:
+    def compile_repo(self) -> None:
        def timeout_handler(signum, frame):
           raise TimeoutError("Tests exceeded time limit")
@@ -71,21 +69,14 @@ class BuildHandler(ABC):
            exec_result = self.container.exec_run(self.compile_cmd())
            output = clean_output(exec_result.output)
            if exec_result.exit_code != 0:
-                self.updates["compiled_successfully"] = False
+                raise FailedToCompileError(output)
                self.updates["error_msg"] = output
                return False
            self.updates["compiled_successfully"] = True 
            return True
        except TimeoutError:
            self.updates["compiled_successfully"] = False
            self.updates["error_msg"] = "Compile process killed due to exceeding the 1-hour time limit"
            return False
        finally:
            signal.alarm(0)  # Cancel the alarm
-    def test_repo(self) -> bool:
+    def test_repo(self) -> None:
        def timeout_handler(signum, frame):
           raise TimeoutError("Tests exceeded time limit")
@@ -96,19 +87,14 @@ class BuildHandler(ABC):
            exec_result = self.container.exec_run(self.test_cmd())
            output = clean_output(exec_result.output)
            if exec_result.exit_code != 0:
-                self.updates["tested_successfully"] = False
+                raise FailedToTestError(output)
                self.updates["error_msg"] = output
                return False
-            self.updates["tested_successfully"] = True
+            self.extract_test_numbers(output)
            self.updates["error_msg"] = output
            return self.extract_test_numbers(output)
        except TimeoutError:
            self.updates["tested_successfully"] = False
            self.updates["error_msg"] = "Test process killed due to exceeding the 1-hour time limit"
-            return False
+            return
        finally:
            signal.alarm(0)  # Cancel the alarm
@@ -126,7 +112,7 @@ class BuildHandler(ABC):
        pass
    @abstractmethod
-    def extract_test_numbers(self, output: str) -> bool:
+    def extract_test_numbers(self, output: str) -> None:
        pass
    @abstractmethod
@@ -157,7 +143,7 @@ class MavenHandler(BuildHandler):
    def container_name(self) -> str:
        return "crab-maven"
-    def extract_test_numbers(self, output: str) -> bool:
+    def extract_test_numbers(self, output: str) -> None:
        pattern = r"\[INFO\] Results:\n\[INFO\]\s*\n\[INFO\] Tests run: (\d+), Failures: (\d+), Errors: (\d+), Skipped: (\d+)"
        matches = re.findall(pattern, output)
@@ -169,8 +155,7 @@ class MavenHandler(BuildHandler):
        self.updates["n_tests_skipped"] = 0
        if len(matches) == 0:
-            self.updates["error_msg"] = "No test results found in Maven output:\n" + output
+            raise NoTestResultsToExtractError("No test results found in Maven output:\n" + output)
            return False
        for match in matches:
            tests_run, failures, errors, skipped = map(int, match)
@@ -180,10 +165,6 @@ class MavenHandler(BuildHandler):
            self.updates["n_tests_skipped"] += skipped
            self.updates["n_tests_passed"] += (tests_run - (failures + errors))  # Calculate passed tests
        return True
 class GradleHandler(BuildHandler):
    def __init__(self, repo_path: str, build_file: str, updates: dict) -> None:
        super().__init__(repo_path, build_file, updates)
@@ -201,7 +182,7 @@ class GradleHandler(BuildHandler):
    def container_name(self) -> str:
        return "crab-gradle"
-    def extract_test_numbers(self, output: str) -> bool:
+    def extract_test_numbers(self, output: str) -> None:
        self.updates["n_tests"] = -1
        self.updates["n_tests_passed"] = -1
        self.updates["n_tests_failed"] = -1
@@ -210,8 +191,7 @@ class GradleHandler(BuildHandler):
        test_results_path = os.path.join(self.path, "build/reports/tests/test/index.html")
        if not os.path.exists(test_results_path):
-            self.updates["error_msg"] = "No test results found (prolly a repo with sub-projects)"
+            raise NoTestResultsToExtractError("No test results found (prolly a repo with sub-projects)")
            return False
        # Load the HTML file
        with open(test_results_path, "r") as file:
@@ -219,31 +199,38 @@ class GradleHandler(BuildHandler):
            test_div = soup.find("div", class_="infoBox", id="tests")
            if test_div is None:
-                self.updates["error_msg"] = "No test results found (no div.infoBox#tests)"
+                raise NoTestResultsToExtractError("No test results found (no div.infoBox#tests)")
                return False
            counter_div = test_div.find("div", class_="counter")
            if counter_div is None:
-                self.updates["error_msg"] = "No test results found (not div.counter for tests)"
+                raise NoTestResultsToExtractError("No test results found (not div.counter for tests)")
                return False
            self.updates["n_tests"] = int(counter_div.text.strip())
            failures_div = soup.find("div", class_="infoBox", id="failures")
            if failures_div is None:
-                self.updates["error_msg"] = "No test results found (no div.infoBox#failures)"
+                raise NoTestResultsToExtractError("No test results found (no div.infoBox#failures)")
                return False
            counter_div = failures_div.find("div", class_="counter")
            if counter_div is None:
-                self.updates["error_msg"] = "No test results found (not div.counter for failures)"
+                raise NoTestResultsToExtractError("No test results found (not div.counter for failures)")
                return False
            self.updates["n_tests_failed"] = int(counter_div.text.strip())
            # Calculate passed tests
            self.updates["n_tests_passed"] = self.updates["n_tests"] - self.updates["n_tests_failed"]
-        return True
+
 class NoTestsFoundError(Exception):
    pass
 class FailedToCompileError(Exception):
    pass
 class FailedToTestError(Exception):
    pass
 class NoTestResultsToExtractError(Exception):
    pass
 def merge_download_lines(lines: list) -> list:
    """