mirror of
https://github.com/karma-riuk/crab.git
synced 2025-07-05 13:38:12 +02:00
instead of adding the cache as we go through the
repos, just add it before any processing, so we are sure to keep all the previously saved data
This commit is contained in:
@ -382,9 +382,7 @@ def process_repo(
|
|||||||
repo = g.get_repo(repo_name)
|
repo = g.get_repo(repo_name)
|
||||||
already_seen_prs = set()
|
already_seen_prs = set()
|
||||||
if repo.full_name in cache:
|
if repo.full_name in cache:
|
||||||
dataset.entries.extend(cache[repo.full_name].values())
|
|
||||||
already_seen_prs = set(cache[repo.full_name].keys())
|
already_seen_prs = set(cache[repo.full_name].keys())
|
||||||
dataset.to_json(args.output)
|
|
||||||
|
|
||||||
prs = repo.get_pulls(state="closed")
|
prs = repo.get_pulls(state="closed")
|
||||||
|
|
||||||
@ -422,6 +420,10 @@ def process_repos(
|
|||||||
Passing it by reference in order have the latest information, in case of an error
|
Passing it by reference in order have the latest information, in case of an error
|
||||||
verbose (bool): Whether to be verbose or not
|
verbose (bool): Whether to be verbose or not
|
||||||
"""
|
"""
|
||||||
|
for pr2entry in tqdm(cache.values(), desc="Adding cache in dataset"):
|
||||||
|
dataset.entries.extend(pr2entry.values())
|
||||||
|
dataset.to_json(args.output)
|
||||||
|
|
||||||
with tqdm(total=len(df), desc="Processing repos") as pbar:
|
with tqdm(total=len(df), desc="Processing repos") as pbar:
|
||||||
for _, row in df.iterrows():
|
for _, row in df.iterrows():
|
||||||
repo_name = row["name"]
|
repo_name = row["name"]
|
||||||
|
Reference in New Issue
Block a user