the input to code refinement now ignores paraphrases

This commit is contained in:
Karma Riuk
2025-06-10 20:45:51 +02:00
parent 429fe9b060
commit f5bdfd1a1b

View File

@ -111,12 +111,28 @@ class CommentGenEntry:
diffs=entry.diffs_before, diffs=entry.diffs_before,
) )
@dataclass
class CodeRefinementComment:
body: str
file: str
from_: int
to: int
@classmethod
def from_comment(cls, comment: Comment) -> "CodeRefinementComment":
return cls(
body=comment.body,
file=comment.file,
from_=comment.from_,
to=comment.to,
)
@dataclass @dataclass
class CodeRefinementEntry: class CodeRefinementEntry:
id: str id: str
files: Dict[str, str] # filename -> file content files: Dict[str, str] # filename -> file content
diffs: Dict[str, str] # filename -> diff, diffs between the opening of the PR and the comment diffs: Dict[str, str] # filename -> diff, diffs between the opening of the PR and the comment
comments: List[Comment] comments: List[CodeRefinementComment]
@staticmethod @staticmethod
def from_entry(entry: DatasetEntry) -> "CodeRefinementEntry": def from_entry(entry: DatasetEntry) -> "CodeRefinementEntry":
@ -124,7 +140,7 @@ class CodeRefinementEntry:
id=entry.metadata.id, id=entry.metadata.id,
files={fname: fdata.content_before_pr for fname, fdata in entry.files.items()}, files={fname: fdata.content_before_pr for fname, fdata in entry.files.items()},
diffs=entry.diffs_before, diffs=entry.diffs_before,
comments=entry.comments, comments=[CodeRefinementComment.from_comment(c) for c in entry.comments],
) )
# fmt: on # fmt: on