From 04a37030f44eec97aecfd5ca2ba2f38e5f74eba2 Mon Sep 17 00:00:00 2001 From: Karma Riuk Date: Tue, 20 May 2025 09:57:04 +0200 Subject: [PATCH] populating cache only if there is any cache --- pull_requests.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pull_requests.py b/pull_requests.py index 8af69e3..7f76f94 100644 --- a/pull_requests.py +++ b/pull_requests.py @@ -454,9 +454,10 @@ def process_repos_parallel( archive_destination: Directory for archives cache: Optional cache of previously processed PR entries """ - for pr2entry in tqdm(cache.values(), desc="Adding cache in dataset"): - dataset.entries.extend(pr2entry.values()) - dataset.to_json(args.output) + if len(cache) > 0: + for pr2entry in tqdm(list(cache.values()), desc="Adding cache in dataset"): + dataset.entries.extend(pr2entry.values()) + dataset.to_json(args.output) repo_names = df["name"] free_positions = list(range(1, n_workers + 1)) @@ -529,9 +530,10 @@ def process_repos( Passing it by reference in order have the latest information, in case of an error verbose (bool): Whether to be verbose or not """ - for pr2entry in tqdm(cache.values(), desc="Adding cache in dataset"): - dataset.entries.extend(pr2entry.values()) - dataset.to_json(args.output) + if len(cache) > 0: + for pr2entry in tqdm(list(cache.values()), desc="Adding cache in dataset"): + dataset.entries.extend(pr2entry.values()) + dataset.to_json(args.output) with tqdm(total=len(df), desc="Processing repos") as pbar: for _, row in df.iterrows():