From 04a37030f44eec97aecfd5ca2ba2f38e5f74eba2 Mon Sep 17 00:00:00 2001
From: Karma Riuk <riukkarma@gmail.com>
Date: Tue, 20 May 2025 09:57:04 +0200
Subject: [PATCH] populating cache only if there is any cache

---
 pull_requests.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/pull_requests.py b/pull_requests.py
index 8af69e3..7f76f94 100644
--- a/pull_requests.py
+++ b/pull_requests.py
@@ -454,9 +454,10 @@ def process_repos_parallel(
         archive_destination: Directory for archives
         cache: Optional cache of previously processed PR entries
     """
-    for pr2entry in tqdm(cache.values(), desc="Adding cache in dataset"):
-        dataset.entries.extend(pr2entry.values())
-    dataset.to_json(args.output)
+    if len(cache) > 0:
+        for pr2entry in tqdm(list(cache.values()), desc="Adding cache in dataset"):
+            dataset.entries.extend(pr2entry.values())
+        dataset.to_json(args.output)
 
     repo_names = df["name"]
     free_positions = list(range(1, n_workers + 1))
@@ -529,9 +530,10 @@ def process_repos(
         Passing it by reference in order have the latest information, in case of an error
     verbose (bool): Whether to be verbose or not
     """
-    for pr2entry in tqdm(cache.values(), desc="Adding cache in dataset"):
-        dataset.entries.extend(pr2entry.values())
-    dataset.to_json(args.output)
+    if len(cache) > 0:
+        for pr2entry in tqdm(list(cache.values()), desc="Adding cache in dataset"):
+            dataset.entries.extend(pr2entry.values())
+        dataset.to_json(args.output)
 
     with tqdm(total=len(df), desc="Processing repos") as pbar:
         for _, row in df.iterrows():