added check to see if the repos compile and run tests successfully

This commit is contained in:
Karma Riuk
2025-02-28 16:24:27 +01:00
parent 761095e853
commit e776dbdf6e

View File

@ -1,5 +1,5 @@
import pandas as pd import pandas as pd
import argparse, os, sys, subprocess import argparse, os, sys, subprocess, docker
from tqdm import tqdm from tqdm import tqdm
import shutil import shutil
@ -118,47 +118,121 @@ def remove_dir(dir: str) -> None:
if os.listdir(parent) == []: if os.listdir(parent) == []:
shutil.rmtree(parent) shutil.rmtree(parent)
def create_docker_container(client):
container = client.containers.run(
image="crab-java-env",
command="tail -f /dev/null",
detach=True,
tty=True
)
return container
def process_row(row, dest: str, force: bool = False, verbose: bool = False) -> dict: def execute_in_container(container, command):
updates = {} # Dictionary to store updates exec_result = container.exec_run(command, stream=True)
with tqdm(total=3, leave=False) as pbar: output = "".join([line.decode() for line in exec_result.output])
repo = row["name"] return exec_result.exit_code, output
if repo in EXCLUSION_LIST:
updates["error_msg"] = "Repo in exclusion list"
if verbose: print(f"Skipping {repo}, in exclusion list")
return updates
pbar.set_postfix_str("Cloning...") def compile_repo(build_file: str, container, updates: dict) -> bool:
if force: """
clone(repo, dest, updates, verbose=verbose) Attempts to compile a repository inside a running Docker container.
pbar.update(1) """
if build_file.endswith("pom.xml") or build_file.endswith("build.xml"):
build_cmd = "mvn clean compile"
elif build_file.endswith("build.gradle"):
build_cmd = "gradle compileJava"
else:
updates["error_msg"] = "Unsupported build system for compiling: " + build_file
return False
repo_path = os.path.join(dest, repo) exit_code, output = execute_in_container(container, build_cmd)
if not os.path.exists(repo_path): if exit_code != 0:
updates["error_msg"] = "Repo not cloned" updates["compiled_successfully"] = False
return updates updates["error_msg"] = output
return False
pbar.set_postfix_str("Getting build file...") updates["compiled_successfully"] = True
build_file = get_build_file(dest, repo, updates) return True
if build_file is None:
if verbose: print(f"Removing {repo}, no build file") def test_repo(build_file: str, container, updates: dict) -> bool:
remove_dir(repo_path) if build_file.endswith("pom.xml") or build_file.endswith("build.xml"):
return updates test_cmd = "mvn clean compile"
pbar.update(1) elif build_file.endswith("build.gradle"):
test_cmd = "gradle compileJava"
else:
updates["error_msg"] = "Unsupported build system for testing: " + build_file
return False
exit_code, output = execute_in_container(container, test_cmd)
if exit_code != 0:
updates["tested_successfully"] = False
updates["error_msg"] = output
return False
updates["tested_successfully"] = True
updates["error_msg"] = output
return True
pbar.set_postfix_str("Checking for tests...") def process_row(repo, client, dest: str, force: bool = False, verbose: bool = False) -> dict:
if not has_tests(repo_path, build_file, updates): updates = {}
if verbose: print(f"Removing {repo}, no test suites") container = create_docker_container(client)
remove_dir(repo_path)
return updates
if verbose: print(f"Keeping {repo}")
pbar.update(1)
# Check for compilation and tests try:
with tqdm(total=5, leave=False) as pbar:
if repo in EXCLUSION_LIST:
updates["error_msg"] = "Repo in exclusion list"
if verbose: print(f"Skipping {repo}, in exclusion list")
return updates
# If repo was not removed, then it is a good repo pbar.set_postfix_str("Cloning...")
updates["good_repo_for_crab"] = True if force:
clone(repo, dest, updates, verbose=verbose)
pbar.update(1)
repo_path = os.path.join(dest, repo)
if not os.path.exists(repo_path):
updates["error_msg"] = "Repo not cloned"
return updates
pbar.set_postfix_str("Getting build file...")
build_file = get_build_file(dest, repo, updates)
if build_file is None:
if verbose: print(f"Removing {repo}, no build file")
remove_dir(repo_path)
return updates
pbar.update(1)
pbar.set_postfix_str("Checking for tests...")
if not has_tests(repo_path, build_file, updates):
if verbose: print(f"Removing {repo}, no test suites")
remove_dir(repo_path)
return updates
if verbose: print(f"Keeping {repo}")
pbar.update(1)
pbar.set_postfix_str("Compiling...")
compiled = compile_repo(build_file, container, updates)
if not compiled:
if verbose: print(f"Removing {repo}, failed to compile")
remove_dir(repo_path)
return updates
pbar.update(1)
pbar.set_postfix_str("Runing tests...")
compiled = test_repo(build_file, container, updates)
if not compiled:
if verbose: print(f"Removing {repo}, failed to compile")
remove_dir(repo_path)
return updates
pbar.update(1)
# If repo was not removed, then it is a good repo
updates["good_repo_for_crab"] = True
finally:
container.kill()
container.remove()
return updates return updates
def clone_repos(file: str, dest: str, force: bool =False, verbose: bool = False) -> None: def clone_repos(file: str, dest: str, force: bool =False, verbose: bool = False) -> None:
@ -178,13 +252,14 @@ def clone_repos(file: str, dest: str, force: bool =False, verbose: bool = False)
df = df[["name"]] df = df[["name"]]
updates_list = [] # Collect updates in a list updates_list = [] # Collect updates in a list
client = docker.from_env()
good_repos = 0 good_repos = 0
try: try:
if verbose: print("Processing repositories") if verbose: print("Processing repositories")
with tqdm(total=len(df)) as pbar: with tqdm(total=len(df)) as pbar:
for i, row in df.iterrows(): for i, row in df.iterrows():
updates = process_row(row, dest, force=force, verbose=verbose) updates = process_row(row["name"], client, dest, force=force, verbose=verbose)
if "good_repo_for_crab" in updates and updates["good_repo_for_crab"]: if "good_repo_for_crab" in updates and updates["good_repo_for_crab"]:
good_repos += 1 good_repos += 1
pbar.update(1) pbar.update(1)
@ -200,13 +275,15 @@ def clone_repos(file: str, dest: str, force: bool =False, verbose: bool = False)
build_system=None, build_system=None,
depth_of_build_file=None, depth_of_build_file=None,
detected_source_of_tests=None, detected_source_of_tests=None,
error_msg=None, compiled_successfully=None,
good_repo_for_crab=False, tested_successfully=None,
n_tests=None, n_tests=None,
n_tests_with_grep=None, n_tests_with_grep=None,
n_tests_passed=None, n_tests_passed=None,
n_tests_failed=None, n_tests_failed=None,
n_tests_skipped=None n_tests_skipped=None,
good_repo_for_crab=False,
error_msg=None,
) )
# Set the new data # Set the new data