was accidentally copying over prs that were cached

twice
This commit is contained in:
Karma Riuk
2025-04-01 15:45:23 +02:00
parent d4dd72469e
commit bf8869e66c

View File

@ -382,8 +382,10 @@ def process_repo(
cache: dict[str, dict[int, DatasetEntry]] = {},
):
repo = g.get_repo(repo_name)
already_seen_prs = set()
if repo.full_name in cache:
dataset.entries.extend(cache[repo.full_name].values())
already_seen_prs = set(cache[repo.full_name].keys())
dataset.to_json(args.output)
prs = repo.get_pulls(state="closed")
@ -393,7 +395,7 @@ def process_repo(
for pr in prs:
pbar.set_postfix({"pr": pr.number, "# new good found": n_good_prs})
try:
if pr.merged_at is None or not is_pull_good(pr):
if pr.merged_at is None or pr.number in already_seen_prs or not is_pull_good(pr):
continue
n_good_prs += 1