From bf8869e66cadbc1893f3b59ad5ba4cdf488ef3e9 Mon Sep 17 00:00:00 2001 From: Karma Riuk Date: Tue, 1 Apr 2025 15:45:23 +0200 Subject: [PATCH] was accidentally copying over prs that were cached twice --- pull_requests.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pull_requests.py b/pull_requests.py index 1bbca40..df61d7c 100644 --- a/pull_requests.py +++ b/pull_requests.py @@ -382,8 +382,10 @@ def process_repo( cache: dict[str, dict[int, DatasetEntry]] = {}, ): repo = g.get_repo(repo_name) + already_seen_prs = set() if repo.full_name in cache: dataset.entries.extend(cache[repo.full_name].values()) + already_seen_prs = set(cache[repo.full_name].keys()) dataset.to_json(args.output) prs = repo.get_pulls(state="closed") @@ -393,7 +395,7 @@ def process_repo( for pr in prs: pbar.set_postfix({"pr": pr.number, "# new good found": n_good_prs}) try: - if pr.merged_at is None or not is_pull_good(pr): + if pr.merged_at is None or pr.number in already_seen_prs or not is_pull_good(pr): continue n_good_prs += 1