now using the metadata to get archive name

This commit is contained in:
Karma Riuk
2025-05-14 09:34:59 +02:00
parent 1f91acf6c1
commit ccd962c205

View File

@ -11,6 +11,7 @@ from tqdm import tqdm
from datetime import datetime, timedelta from datetime import datetime, timedelta
from dataset import ( from dataset import (
ArchiveState,
Comment, Comment,
Dataset, Dataset,
DatasetEntry, DatasetEntry,
@ -202,9 +203,7 @@ def get_comments(pr: PullRequest) -> list[Comment]:
return ret return ret
def archive_repo( def archive_repo(repo_path: str, metadata: Metadata, destination: str, state: ArchiveState) -> None:
repo_path: str, repo_name: str, pr_number: int, destination: str, post_fix: str
) -> None:
""" """
Archives the repo at the specified path, including only the files tracked by git. Archives the repo at the specified path, including only the files tracked by git.
The archive is stored in the destination directory with a filename based on the PR number. The archive is stored in the destination directory with a filename based on the PR number.
@ -212,7 +211,7 @@ def archive_repo(
if not os.path.exists(destination): if not os.path.exists(destination):
os.makedirs(destination) os.makedirs(destination)
archive_name = f"{repo_name.replace('/', '_')}_{pr_number}_{post_fix}.tar.gz" archive_name = metadata.archive_name(state)
archive_path = os.path.join(destination, archive_name) archive_path = os.path.join(destination, archive_name)
result = run_git_cmd(["ls-files"], repo_path) result = run_git_cmd(["ls-files"], repo_path)
@ -237,8 +236,7 @@ def process_pull(
dataset.entries.append(cache[repo.full_name][pr.number]) dataset.entries.append(cache[repo.full_name][pr.number])
return return
entry = DatasetEntry( metadata = Metadata(
metadata=Metadata(
uuid.uuid4().hex, uuid.uuid4().hex,
repo.full_name, repo.full_name,
pr.number, pr.number,
@ -246,7 +244,9 @@ def process_pull(
pr.body, pr.body,
pr.merge_commit_sha, pr.merge_commit_sha,
reason_for_failure="Was still being processed", reason_for_failure="Was still being processed",
), )
entry = DatasetEntry(
metadata=metadata,
files={}, files={},
diffs_before={}, diffs_before={},
comments=[], comments=[],
@ -284,7 +284,7 @@ def process_pull(
("Checkout out base commit...", lambda: checkout(repo_path, pr.base.sha, pr.number)), ("Checkout out base commit...", lambda: checkout(repo_path, pr.base.sha, pr.number)),
( (
"Archiving the repo...", "Archiving the repo...",
lambda: archive_repo(repo_path, repo.full_name, pr.number, archive_destination, "base"), lambda: archive_repo(repo_path, metadata, archive_destination, ArchiveState.BASE),
), ),
( (
"Checkout out merge commit...", "Checkout out merge commit...",
@ -292,9 +292,7 @@ def process_pull(
), ),
( (
"Archiving the repo...", "Archiving the repo...",
lambda: archive_repo( lambda: archive_repo(repo_path, metadata, archive_destination, ArchiveState.MERGED),
repo_path, repo.full_name, pr.number, archive_destination, "merged"
),
), ),
] ]