mirror of
https://github.com/karma-riuk/crab.git
synced 2025-07-05 13:38:12 +02:00
extracted the process_row function because it was
becoming quite big
This commit is contained in:
@ -117,33 +117,7 @@ def remove_dir(dir: str) -> None:
|
|||||||
shutil.rmtree(parent)
|
shutil.rmtree(parent)
|
||||||
|
|
||||||
|
|
||||||
def clone_repos(file: str, dest: str, force: bool =False, verbose: bool = False) -> None:
|
def process_row(row, dest: str, force: bool = False, verbose: bool = False):
|
||||||
"""
|
|
||||||
Download the repos listed in the file passed as argument. The downloaded repos will be placed in the folder that is named as the dest argument.
|
|
||||||
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
file (str): The name of the file to download the repos from. Must be a .csv.gz file (downloaded from https://seart-ghs.si.usi.ch)
|
|
||||||
dest (str): The name of the root directory in which to download the repos
|
|
||||||
verbose (bool): If `True`, outputs detailed process information. Defaults to `False`.
|
|
||||||
"""
|
|
||||||
if verbose: print(f"Reading CSV file {file}")
|
|
||||||
df = pd.read_csv(file)
|
|
||||||
|
|
||||||
df["successfully_cloned"] = None
|
|
||||||
df["build_system"] = None
|
|
||||||
df["depth_of_build_file"] = None
|
|
||||||
df["detected_source_of_tests"] = None
|
|
||||||
df["error_msg"] = None
|
|
||||||
df["good_repo_for_crab"] = False
|
|
||||||
df["n_tests"] = None
|
|
||||||
df["n_tests_with_grep"] = None
|
|
||||||
df["n_tests_passed"] = None
|
|
||||||
df["n_tests_failed"] = None
|
|
||||||
df["n_tests_skipped"] = None
|
|
||||||
|
|
||||||
if verbose: print("Cloning repositories")
|
|
||||||
def _process(row)->None:
|
|
||||||
repo = row["name"]
|
repo = row["name"]
|
||||||
if repo in EXCLUSION_LIST:
|
if repo in EXCLUSION_LIST:
|
||||||
row["error_msg"] = "Repo in exclusion list"
|
row["error_msg"] = "Repo in exclusion list"
|
||||||
@ -175,13 +149,38 @@ def clone_repos(file: str, dest: str, force: bool =False, verbose: bool = False)
|
|||||||
# If repo was not removed, then it is a good repo
|
# If repo was not removed, then it is a good repo
|
||||||
row["good_repo_for_crab"] = True
|
row["good_repo_for_crab"] = True
|
||||||
|
|
||||||
|
def clone_repos(file: str, dest: str, force: bool =False, verbose: bool = False) -> None:
|
||||||
|
"""
|
||||||
|
Download the repos listed in the file passed as argument. The downloaded repos will be placed in the folder that is named as the dest argument.
|
||||||
|
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
file (str): The name of the file to download the repos from. Must be a .csv.gz file (downloaded from https://seart-ghs.si.usi.ch)
|
||||||
|
dest (str): The name of the root directory in which to download the repos
|
||||||
|
verbose (bool): If `True`, outputs detailed process information. Defaults to `False`.
|
||||||
|
"""
|
||||||
|
if verbose: print(f"Reading CSV file {file}")
|
||||||
|
df = pd.read_csv(file)
|
||||||
|
|
||||||
|
df["successfully_cloned"] = None
|
||||||
|
df["build_system"] = None
|
||||||
|
df["depth_of_build_file"] = None
|
||||||
|
df["detected_source_of_tests"] = None
|
||||||
|
df["error_msg"] = None
|
||||||
|
df["good_repo_for_crab"] = False
|
||||||
|
df["n_tests"] = None
|
||||||
|
df["n_tests_with_grep"] = None
|
||||||
|
df["n_tests_passed"] = None
|
||||||
|
df["n_tests_failed"] = None
|
||||||
|
df["n_tests_skipped"] = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
df.progress_apply(_process, axis=1)
|
if verbose: print("Processing repositories")
|
||||||
|
df.progress_apply(lambda row: process_row(row, dest, force=force, verbose=verbose), axis=1)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print("Keyboard interrupt detected. Stopping the processing of the repos...")
|
print("Keyboard interrupt detected. Stopping the processing of the repos...")
|
||||||
|
|
||||||
if verbose: print("Writing CSV file")
|
if verbose: print("Writing results...")
|
||||||
df.to_csv("results.csv.gz", index=False)
|
df.to_csv("results.csv.gz", index=False)
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user