was accidentally copying over prs that were cached

twice
This commit is contained in:
Karma Riuk
2025-04-01 15:45:23 +02:00
parent d4dd72469e
commit bf8869e66c

View File

@ -382,8 +382,10 @@ def process_repo(
cache: dict[str, dict[int, DatasetEntry]] = {}, cache: dict[str, dict[int, DatasetEntry]] = {},
): ):
repo = g.get_repo(repo_name) repo = g.get_repo(repo_name)
already_seen_prs = set()
if repo.full_name in cache: if repo.full_name in cache:
dataset.entries.extend(cache[repo.full_name].values()) dataset.entries.extend(cache[repo.full_name].values())
already_seen_prs = set(cache[repo.full_name].keys())
dataset.to_json(args.output) dataset.to_json(args.output)
prs = repo.get_pulls(state="closed") prs = repo.get_pulls(state="closed")
@ -393,7 +395,7 @@ def process_repo(
for pr in prs: for pr in prs:
pbar.set_postfix({"pr": pr.number, "# new good found": n_good_prs}) pbar.set_postfix({"pr": pr.number, "# new good found": n_good_prs})
try: try:
if pr.merged_at is None or not is_pull_good(pr): if pr.merged_at is None or pr.number in already_seen_prs or not is_pull_good(pr):
continue continue
n_good_prs += 1 n_good_prs += 1