mirror of
https://github.com/karma-riuk/crab.git
synced 2025-07-05 13:38:12 +02:00
fixed lazy loading
This commit is contained in:
@ -162,25 +162,6 @@ def process_row(repo, client, dest: str, updates: dict, force: bool = False, ver
|
|||||||
updates["good_repo_for_crab"] = True
|
updates["good_repo_for_crab"] = True
|
||||||
|
|
||||||
def save_df_with_updates(df, updates_list, results_file: str, verbose=False):
|
def save_df_with_updates(df, updates_list, results_file: str, verbose=False):
|
||||||
# Create columns for the new data
|
|
||||||
df = df.assign(
|
|
||||||
processed=False,
|
|
||||||
cloned_successfully=None,
|
|
||||||
build_system=None,
|
|
||||||
depth_of_build_file=None,
|
|
||||||
detected_source_of_tests=None,
|
|
||||||
compiled_successfully=None,
|
|
||||||
tested_successfully=None,
|
|
||||||
n_tests=None,
|
|
||||||
n_tests_with_grep=None,
|
|
||||||
n_tests_passed=None,
|
|
||||||
n_tests_failed=None,
|
|
||||||
n_tests_errors=None,
|
|
||||||
n_tests_skipped=None,
|
|
||||||
good_repo_for_crab=None,
|
|
||||||
error_msg=None,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Set the new data
|
# Set the new data
|
||||||
for index, updates in updates_list:
|
for index, updates in updates_list:
|
||||||
for col, value in updates.items():
|
for col, value in updates.items():
|
||||||
@ -205,6 +186,23 @@ def process_repos(file: str, dest: str, results_file: str, /, lazy: bool = False
|
|||||||
|
|
||||||
# drop all columns besides the name
|
# drop all columns besides the name
|
||||||
df = df[["name"]]
|
df = df[["name"]]
|
||||||
|
df = df.assign(
|
||||||
|
processed=False,
|
||||||
|
cloned_successfully=None,
|
||||||
|
build_system=None,
|
||||||
|
depth_of_build_file=None,
|
||||||
|
detected_source_of_tests=None,
|
||||||
|
compiled_successfully=None,
|
||||||
|
tested_successfully=None,
|
||||||
|
n_tests=None,
|
||||||
|
n_tests_with_grep=None,
|
||||||
|
n_tests_passed=None,
|
||||||
|
n_tests_failed=None,
|
||||||
|
n_tests_errors=None,
|
||||||
|
n_tests_skipped=None,
|
||||||
|
good_repo_for_crab=None,
|
||||||
|
error_msg=None,
|
||||||
|
)
|
||||||
|
|
||||||
updates_list = [] # Collect updates in a list
|
updates_list = [] # Collect updates in a list
|
||||||
client = docker.from_env()
|
client = docker.from_env()
|
||||||
@ -212,18 +210,19 @@ def process_repos(file: str, dest: str, results_file: str, /, lazy: bool = False
|
|||||||
good_repos = 0
|
good_repos = 0
|
||||||
n_processed = 0
|
n_processed = 0
|
||||||
last_i_saved = -1
|
last_i_saved = -1
|
||||||
|
to_be_processed = df
|
||||||
if lazy and results_df is not None:
|
if lazy and results_df is not None:
|
||||||
|
df = results_df.copy()
|
||||||
only_processed = results_df[results_df["processed"]]
|
only_processed = results_df[results_df["processed"]]
|
||||||
good_repos = only_processed[only_processed["good_repo_for_crab"] == True]["good_repo_for_crab"].sum()
|
good_repos = only_processed[only_processed["good_repo_for_crab"] == True]["good_repo_for_crab"].sum()
|
||||||
n_processed = len(only_processed)
|
n_processed = len(only_processed)
|
||||||
last_i_saved = n_processed
|
last_i_saved = n_processed
|
||||||
df = df[~df["name"].isin(only_processed["name"])]
|
to_be_processed = df.loc[~df["name"].isin(only_processed["name"])] # the .loc is to have a view of df and not to make a copy (a copy resets the index and we don't want that)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if verbose: print("Processing repositories")
|
if verbose: print("Processing repositories")
|
||||||
with tqdm(total=len(df)) as pbar:
|
with tqdm(total=len(df)) as pbar:
|
||||||
pbar.update(n_processed)
|
pbar.update(n_processed)
|
||||||
for i, row in df.iterrows():
|
for i, row in to_be_processed.iterrows():
|
||||||
if i % 10 == 0:
|
if i % 10 == 0:
|
||||||
save_df_with_updates(df, updates_list, results_file, verbose=verbose)
|
save_df_with_updates(df, updates_list, results_file, verbose=verbose)
|
||||||
last_i_saved = i
|
last_i_saved = i
|
||||||
@ -233,15 +232,6 @@ def process_repos(file: str, dest: str, results_file: str, /, lazy: bool = False
|
|||||||
"# good repos": f"{good_repos} ({good_repos/n_processed if n_processed > 0 else 0:.2%})",
|
"# good repos": f"{good_repos} ({good_repos/n_processed if n_processed > 0 else 0:.2%})",
|
||||||
"time": datetime.now().strftime("%H:%M:%S")
|
"time": datetime.now().strftime("%H:%M:%S")
|
||||||
})
|
})
|
||||||
if lazy:
|
|
||||||
already_processed_row = results_df[results_df["name"] == row["name"]].iloc[0]
|
|
||||||
already_processed = already_processed_row["processed"]
|
|
||||||
if already_processed: # row was already processed
|
|
||||||
pbar.update(1)
|
|
||||||
n_processed += 1
|
|
||||||
updates_list.append((i, dict(already_processed_row)))
|
|
||||||
good_repos += 1 if already_processed_row["good_repo_for_crab"] else 0
|
|
||||||
continue
|
|
||||||
updates = {}
|
updates = {}
|
||||||
updates_list.append((i, updates))
|
updates_list.append((i, updates))
|
||||||
process_row(row["name"], client, dest, updates, force=force, verbose=verbose)
|
process_row(row["name"], client, dest, updates, force=force, verbose=verbose)
|
||||||
|
Reference in New Issue
Block a user