now comments get evaluated correctly, not just

base on bleu score but also on location of file and line number
2025-07-05 06:08:13 +02:00 · 2025-05-21 13:29:40 +02:00
parent 623532f5af
commit dd58d962ac
6 changed files with 109 additions and 13 deletions
--- a/src/utils/dataset.py
+++ b/src/utils/dataset.py
@ -3,6 +3,8 @@ from enum import Enum
 from typing import Any, Dict, List, Optional, Union
 import json, uuid

+from utils.errors import InvalidJsonFormatError
+
 # fmt: off
@dataclass
 class FileData:
@ -48,6 +50,29 @@ class Metadata:
            return f"{self.id}_{state.value}.tar.gz"
        return f"{self.repo.replace('/', '_')}_{self.pr_number}_{state.value}.tar.gz"

+@dataclass
+class CommentGenSubmission:
+    path: str
+    line_from: int
+    line_to: Optional[int]
+    body: str
+
+    @classmethod
+    def json_parse(cls, data) -> "CommentGenSubmission":
+        if not isinstance(data, dict):
+            raise InvalidJsonFormatError("Submitted json doesn't contain an object")
+        if not all(k in data and isinstance(data[k], str) for k in ["path", "body"]):
+            raise InvalidJsonFormatError("Submitted json doesn't contain the required fields")
+        if not all(k in data and isinstance(data[k], (int, type(None))) for k in ["line_from", "line_to"]):
+            raise InvalidJsonFormatError("Submitted json doesn't contain the required fields")
+
+        return cls(
+            path=data["path"],
+            line_from=data["line_from"],
+            line_to=data.get("line_to"),
+            body=data["body"],
+        )
+
@dataclass
 class DatasetEntry:
    metadata: Metadata