added uuid as id

This commit is contained in:
Karma Riuk
2025-05-07 10:18:38 +02:00
parent b3877733cb
commit 40fa958cf8

View File

@ -1,7 +1,7 @@
from dataclasses import dataclass, field from dataclasses import dataclass, field
from enum import Enum from enum import Enum
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
import json, argparse, os import json, argparse, os, uuid
from utils import prompt_yes_no from utils import prompt_yes_no
# fmt: off # fmt: off
@ -27,6 +27,7 @@ class Selection:
@dataclass @dataclass
class Metadata: class Metadata:
id: str
repo: str # the name of the repo, with style XXX/YYY repo: str # the name of the repo, with style XXX/YYY
pr_number: int pr_number: int
pr_title: str pr_title: str
@ -49,14 +50,14 @@ class DatasetEntry:
@dataclass @dataclass
class CommentGenEntry: class CommentGenEntry:
id: int id: str
files: Dict[str, str] # filename -> file content files: Dict[str, str] # filename -> file content
diffs: Dict[str, str] # filename -> diff, diffs between the opening of the PR and the comment diffs: Dict[str, str] # filename -> diff, diffs between the opening of the PR and the comment
@staticmethod @staticmethod
def from_entry(entry: DatasetEntry, id: int) -> "CommentGenEntry": def from_entry(entry: DatasetEntry) -> "CommentGenEntry":
return CommentGenEntry( return CommentGenEntry(
id=id, id=entry.metadata.id,
files={fname: fdata.content_before_pr for fname, fdata in entry.files.items()}, files={fname: fdata.content_before_pr for fname, fdata in entry.files.items()},
diffs=entry.diffs_before, diffs=entry.diffs_before,
) )
@ -64,15 +65,15 @@ class CommentGenEntry:
@dataclass @dataclass
class CodeRefinementEntry: class CodeRefinementEntry:
id: int id: str
files: Dict[str, str] # filename -> file content files: Dict[str, str] # filename -> file content
diffs: Dict[str, str] # filename -> diff, diffs between the opening of the PR and the comment diffs: Dict[str, str] # filename -> diff, diffs between the opening of the PR and the comment
comments: List[Comment] comments: List[Comment]
@staticmethod @staticmethod
def from_entry(entry: DatasetEntry, id: int) -> "CodeRefinementEntry": def from_entry(entry: DatasetEntry) -> "CodeRefinementEntry":
return CodeRefinementEntry( return CodeRefinementEntry(
id=id, id=entry.metadata.id,
files={fname: fdata.content_before_pr for fname, fdata in entry.files.items()}, files={fname: fdata.content_before_pr for fname, fdata in entry.files.items()},
diffs=entry.diffs_before, diffs=entry.diffs_before,
comments=entry.comments, comments=entry.comments,
@ -109,7 +110,6 @@ class Dataset:
if entry.metadata.selection and entry.metadata.selection.diff_after_address_change if entry.metadata.selection and entry.metadata.selection.diff_after_address_change
] ]
entry_counter = -1
to_dump = Dataset(entries=entries_to_dump) to_dump = Dataset(entries=entries_to_dump)
def transform_entry(entry: DatasetEntry | Dataset | Any) -> dict | list: def transform_entry(entry: DatasetEntry | Dataset | Any) -> dict | list:
@ -122,13 +122,11 @@ class Dataset:
if isinstance(entry, Dataset): if isinstance(entry, Dataset):
return entry.entries return entry.entries
nonlocal entry_counter
entry_counter += 1
if type_ == OutputType.COMMENT_GEN: if type_ == OutputType.COMMENT_GEN:
return CommentGenEntry.from_entry(entry, entry_counter).__dict__ return CommentGenEntry.from_entry(entry).__dict__
if type_ == OutputType.CODE_REFINEMENT: if type_ == OutputType.CODE_REFINEMENT:
return CodeRefinementEntry.from_entry(entry, entry_counter).__dict__ return CodeRefinementEntry.from_entry(entry).__dict__
with open(filename, "w", encoding="utf-8") as f: with open(filename, "w", encoding="utf-8") as f:
json.dump(to_dump, f, default=transform_entry, indent=4) json.dump(to_dump, f, default=transform_entry, indent=4)
@ -146,6 +144,8 @@ class Dataset:
selection_data = metadata_data["selection"] if "selection" in metadata_data else None selection_data = metadata_data["selection"] if "selection" in metadata_data else None
selection = Selection(**selection_data) if selection_data else None selection = Selection(**selection_data) if selection_data else None
metadata_data["selection"] = selection metadata_data["selection"] = selection
if "id" not in metadata_data:
metadata_data["id"] = uuid.uuid4().hex
metadata = Metadata(**metadata_data) metadata = Metadata(**metadata_data)
if ( if (