mirror of
https://github.com/karma-riuk/crab.git
synced 2025-07-05 05:28:13 +02:00
added uuid as id
This commit is contained in:
24
dataset.py
24
dataset.py
@ -1,7 +1,7 @@
|
|||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
import json, argparse, os
|
import json, argparse, os, uuid
|
||||||
from utils import prompt_yes_no
|
from utils import prompt_yes_no
|
||||||
|
|
||||||
# fmt: off
|
# fmt: off
|
||||||
@ -27,6 +27,7 @@ class Selection:
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Metadata:
|
class Metadata:
|
||||||
|
id: str
|
||||||
repo: str # the name of the repo, with style XXX/YYY
|
repo: str # the name of the repo, with style XXX/YYY
|
||||||
pr_number: int
|
pr_number: int
|
||||||
pr_title: str
|
pr_title: str
|
||||||
@ -49,14 +50,14 @@ class DatasetEntry:
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class CommentGenEntry:
|
class CommentGenEntry:
|
||||||
id: int
|
id: str
|
||||||
files: Dict[str, str] # filename -> file content
|
files: Dict[str, str] # filename -> file content
|
||||||
diffs: Dict[str, str] # filename -> diff, diffs between the opening of the PR and the comment
|
diffs: Dict[str, str] # filename -> diff, diffs between the opening of the PR and the comment
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def from_entry(entry: DatasetEntry, id: int) -> "CommentGenEntry":
|
def from_entry(entry: DatasetEntry) -> "CommentGenEntry":
|
||||||
return CommentGenEntry(
|
return CommentGenEntry(
|
||||||
id=id,
|
id=entry.metadata.id,
|
||||||
files={fname: fdata.content_before_pr for fname, fdata in entry.files.items()},
|
files={fname: fdata.content_before_pr for fname, fdata in entry.files.items()},
|
||||||
diffs=entry.diffs_before,
|
diffs=entry.diffs_before,
|
||||||
)
|
)
|
||||||
@ -64,15 +65,15 @@ class CommentGenEntry:
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class CodeRefinementEntry:
|
class CodeRefinementEntry:
|
||||||
id: int
|
id: str
|
||||||
files: Dict[str, str] # filename -> file content
|
files: Dict[str, str] # filename -> file content
|
||||||
diffs: Dict[str, str] # filename -> diff, diffs between the opening of the PR and the comment
|
diffs: Dict[str, str] # filename -> diff, diffs between the opening of the PR and the comment
|
||||||
comments: List[Comment]
|
comments: List[Comment]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def from_entry(entry: DatasetEntry, id: int) -> "CodeRefinementEntry":
|
def from_entry(entry: DatasetEntry) -> "CodeRefinementEntry":
|
||||||
return CodeRefinementEntry(
|
return CodeRefinementEntry(
|
||||||
id=id,
|
id=entry.metadata.id,
|
||||||
files={fname: fdata.content_before_pr for fname, fdata in entry.files.items()},
|
files={fname: fdata.content_before_pr for fname, fdata in entry.files.items()},
|
||||||
diffs=entry.diffs_before,
|
diffs=entry.diffs_before,
|
||||||
comments=entry.comments,
|
comments=entry.comments,
|
||||||
@ -109,7 +110,6 @@ class Dataset:
|
|||||||
if entry.metadata.selection and entry.metadata.selection.diff_after_address_change
|
if entry.metadata.selection and entry.metadata.selection.diff_after_address_change
|
||||||
]
|
]
|
||||||
|
|
||||||
entry_counter = -1
|
|
||||||
to_dump = Dataset(entries=entries_to_dump)
|
to_dump = Dataset(entries=entries_to_dump)
|
||||||
|
|
||||||
def transform_entry(entry: DatasetEntry | Dataset | Any) -> dict | list:
|
def transform_entry(entry: DatasetEntry | Dataset | Any) -> dict | list:
|
||||||
@ -122,13 +122,11 @@ class Dataset:
|
|||||||
if isinstance(entry, Dataset):
|
if isinstance(entry, Dataset):
|
||||||
return entry.entries
|
return entry.entries
|
||||||
|
|
||||||
nonlocal entry_counter
|
|
||||||
entry_counter += 1
|
|
||||||
if type_ == OutputType.COMMENT_GEN:
|
if type_ == OutputType.COMMENT_GEN:
|
||||||
return CommentGenEntry.from_entry(entry, entry_counter).__dict__
|
return CommentGenEntry.from_entry(entry).__dict__
|
||||||
|
|
||||||
if type_ == OutputType.CODE_REFINEMENT:
|
if type_ == OutputType.CODE_REFINEMENT:
|
||||||
return CodeRefinementEntry.from_entry(entry, entry_counter).__dict__
|
return CodeRefinementEntry.from_entry(entry).__dict__
|
||||||
|
|
||||||
with open(filename, "w", encoding="utf-8") as f:
|
with open(filename, "w", encoding="utf-8") as f:
|
||||||
json.dump(to_dump, f, default=transform_entry, indent=4)
|
json.dump(to_dump, f, default=transform_entry, indent=4)
|
||||||
@ -146,6 +144,8 @@ class Dataset:
|
|||||||
selection_data = metadata_data["selection"] if "selection" in metadata_data else None
|
selection_data = metadata_data["selection"] if "selection" in metadata_data else None
|
||||||
selection = Selection(**selection_data) if selection_data else None
|
selection = Selection(**selection_data) if selection_data else None
|
||||||
metadata_data["selection"] = selection
|
metadata_data["selection"] = selection
|
||||||
|
if "id" not in metadata_data:
|
||||||
|
metadata_data["id"] = uuid.uuid4().hex
|
||||||
metadata = Metadata(**metadata_data)
|
metadata = Metadata(**metadata_data)
|
||||||
|
|
||||||
if (
|
if (
|
||||||
|
Reference in New Issue
Block a user