now comments get evaluated correctly, not just

base on bleu score but also on location of file and line number
2025-07-24 13:47:48 +02:00 · 2025-05-21 13:29:40 +02:00
parent 623532f5af
commit dd58d962ac
6 changed files with 109 additions and 13 deletions
--- a/public/css/style.css
+++ b/public/css/style.css
@@ -108,6 +108,7 @@ table thead {

 table thead th {
    padding: 8px;
+    white-space: nowrap;
 }

 table thead th:hover {
@@ -146,8 +147,18 @@ table tbody td:nth-child(1) {
    white-space: nowrap;
 }

-/* style score column values */
+/* style correct file column values */
 .results-container#comment table tbody td:nth-child(3) {
+    text-align: center;
+}
+
+/* style distance column values */
+.results-container#comment table tbody td:nth-child(4) {
+    text-align: right;
+}
+
+/* style distance column values */
+.results-container#comment table tbody td:nth-child(5) {
    text-align: right;
 }

--- a/public/index.html
+++ b/public/index.html
@@ -92,6 +92,8 @@
                    <tr>
                        <th>id</th>
                        <th>Proposed comment</th>
+                        <th>Correct file</th>
+                        <th>Distance</th>
                        <th>Max bleu score</th>
                    </tr>
                </thead>
--- a/public/js/index.js
+++ b/public/js/index.js
@@ -37,14 +37,18 @@ function populateCommentTable(results) {
        const row = tbody.insertRow();
        const idCell = row.insertCell();
        const commentCell = row.insertCell();
+        const pathCell = row.insertCell();
+        const distanceCell = row.insertCell();
        const scoreCell = row.insertCell();
        const span = document.createElement("span");

        idCell.textContent = id;
        span.className = "comment-cell";
-        span.textContent = info["proposed_comment"];
+        span.textContent = info["proposed_comment"].body;
        commentCell.appendChild(span);
        scoreCell.textContent = info["max_bleu_score"].toFixed(2);
+        pathCell.textContent = info["correct_file"] ? tick : cross;
+        distanceCell.textContent = info["distance"];
    });
 }

--- a/src/routes/answers.py
+++ b/src/routes/answers.py
@@ -1,6 +1,7 @@
 # routes/answers.py
 from typing import Callable
 from flask import Blueprint, request, jsonify, current_app, url_for
+from utils.dataset import CommentGenSubmission
 from utils.errors import InvalidJsonFormatError
 from utils.process_data import evaluate_comments, evaluate_refinement
 from utils.observer import SocketObserver, Status, Subject
@@ -14,16 +15,22 @@ router = Blueprint('answers', __name__, url_prefix='/answers')
 ALLOWED_EXT = {'json'}


-def validate_json_format_for_comment_gen(data: str) -> dict[str, str]:
+def validate_json_format_for_comment_gen(data: str) -> dict[str, CommentGenSubmission]:
    try:
        obj = json.loads(data)
+        ret = {}
        if not isinstance(obj, dict):
            raise InvalidJsonFormatError("Submitted json doesn't contain an object")
-        if not all(isinstance(v, str) for v in obj.values()):
-            raise InvalidJsonFormatError(
-                "Submitted json object must only be str -> str. Namely id -> comment"
-            )
-        return obj
+
+        for id, submission in obj.items():
+            if not isinstance(id, str):
+                raise InvalidJsonFormatError("The id of a particular submission must be a string")
+            if not isinstance(submission, dict):
+                raise InvalidJsonFormatError(
+                    "A particular submission must be a dictionary of type {'path' -> str, 'line_from' -> int, 'line_to' -> int, 'body' -> str}"
+                )
+            ret[id] = CommentGenSubmission.json_parse(submission)
+        return ret
    except InvalidJsonFormatError as e:
        raise e
    except Exception:
--- a/src/utils/dataset.py
+++ b/src/utils/dataset.py
@@ -3,6 +3,8 @@ from enum import Enum
 from typing import Any, Dict, List, Optional, Union
 import json, uuid

+from utils.errors import InvalidJsonFormatError
+
 # fmt: off
@dataclass
 class FileData:
@@ -48,6 +50,29 @@ class Metadata:
            return f"{self.id}_{state.value}.tar.gz"
        return f"{self.repo.replace('/', '_')}_{self.pr_number}_{state.value}.tar.gz"

+@dataclass
+class CommentGenSubmission:
+    path: str
+    line_from: int
+    line_to: Optional[int]
+    body: str
+
+    @classmethod
+    def json_parse(cls, data) -> "CommentGenSubmission":
+        if not isinstance(data, dict):
+            raise InvalidJsonFormatError("Submitted json doesn't contain an object")
+        if not all(k in data and isinstance(data[k], str) for k in ["path", "body"]):
+            raise InvalidJsonFormatError("Submitted json doesn't contain the required fields")
+        if not all(k in data and isinstance(data[k], (int, type(None))) for k in ["line_from", "line_to"]):
+            raise InvalidJsonFormatError("Submitted json doesn't contain the required fields")
+
+        return cls(
+            path=data["path"],
+            line_from=data["line_from"],
+            line_to=data.get("line_to"),
+            body=data["body"],
+        )
+
@dataclass
 class DatasetEntry:
    metadata: Metadata
--- a/src/utils/process_data.py
+++ b/src/utils/process_data.py
@@ -1,9 +1,10 @@
+import json
 import sys
 from typing_extensions import Callable
 from utils.build_handlers import get_build_handler
 from .paths import get_project_path
 from sacrebleu import sentence_bleu as bleu
-from utils.dataset import ArchiveState, Dataset
+from utils.dataset import ArchiveState, Comment, CommentGenSubmission, Dataset

 REFERENCE_MAP = Dataset.from_json(
    str(get_project_path('../data/dataset.json'))
@@ -12,32 +13,78 @@ REFERENCE_MAP = Dataset.from_json(
 ARCHIVES_ROOT = str(get_project_path('../data/archives'))


+def comment_distance(submission: CommentGenSubmission, entry: Comment):
+    if entry.from_ is None and entry.to is None:
+        return "NA"
+    if submission.line_from is None and submission.line_to is None:
+        return "NA"
+
+    # Collapse missing endpoints to the one defined endpoint
+    # For entry:
+    start1 = entry.from_ if entry.from_ is not None else entry.to
+    end1 = entry.to if entry.to is not None else entry.from_
+    # For submission:
+    start2 = submission.line_from if submission.line_from is not None else submission.line_to
+    end2 = submission.line_to if submission.line_to is not None else submission.line_from
+
+    # Now both start1,end1 and start2,end2 are non-None
+    # Normalize in case from > to (just in case):
+    if start1 > end1:
+        start1, end1 = end1, start1
+    if start2 > end2:
+        start2, end2 = end2, start2
+
+    # Check for overlap
+    if end1 >= start2 and end2 >= start1:
+        return 0
+
+    # Otherwise compute gap
+    if end1 < start2:
+        return start2 - end1
+    else:  # end2 < start1
+        return start1 - end2
+
+
 def evaluate_comments(
-    answers: dict[str, str],
+    answers: dict[str, CommentGenSubmission],
    percent_cb: Callable[[float], None] = lambda _: None,
    complete_cb: Callable[[dict], None] = lambda _: None,
 ):
+    # print("Started processing comments...")
    total = len(answers)
    results = {}
-    for i, (id_, gen) in enumerate(answers.items(), 1):
+    for i, (id_, submission) in enumerate(answers.items(), 1):
+        # print(f"[INFO] Processing {id_} ({i}/{total}: {i/total:.2%})...")
        if id_ not in REFERENCE_MAP:
            print(f"[WARNING] skipping {id} since it is not present in dataset", file=sys.stderr)
            continue
        entry = REFERENCE_MAP[id_]
        max_score = 0
        scores = []
+        # print(f"[INFO] Processing paraphrases...")
        for p in [entry.comments[0].body] + entry.comments[0].paraphrases:
-            score = round(bleu(gen, [p]).score, 2)
+            score = round(bleu(submission.body, [p]).score, 2)
            scores.append(score)
            max_score = max(max_score, score)

+        correct_file = submission.path == entry.comments[0].file
+        # print(f"[INFO] Getting distance...")
+        if correct_file:
+            distance = comment_distance(submission, entry.comments[0])
+        else:
+            distance = "NA"
+
+        # print(f"[INFO] Populating result...")
        results[id_] = {
            'max_bleu_score': max_score,
            'bleu_scores': scores,
-            'proposed_comment': gen,
+            'proposed_comment': submission.__dict__,
+            'correct_file': correct_file,
+            'distance': distance,
        }
        percent_cb(int(i / total * 100))

+    # print(f"[INFO] Sending results...")
    complete_cb(results)
    return results