now comments get evaluated correctly, not just

base on bleu score but also on location of file and line number
This commit is contained in:
Karma Riuk
2025-05-21 13:29:40 +02:00
parent 623532f5af
commit dd58d962ac
6 changed files with 109 additions and 13 deletions

View File

@ -108,6 +108,7 @@ table thead {
table thead th {
padding: 8px;
white-space: nowrap;
}
table thead th:hover {
@ -146,8 +147,18 @@ table tbody td:nth-child(1) {
white-space: nowrap;
}
/* style score column values */
/* style correct file column values */
.results-container#comment table tbody td:nth-child(3) {
text-align: center;
}
/* style distance column values */
.results-container#comment table tbody td:nth-child(4) {
text-align: right;
}
/* style distance column values */
.results-container#comment table tbody td:nth-child(5) {
text-align: right;
}

View File

@ -92,6 +92,8 @@
<tr>
<th>id</th>
<th>Proposed comment</th>
<th>Correct file</th>
<th>Distance</th>
<th>Max bleu score</th>
</tr>
</thead>

View File

@ -37,14 +37,18 @@ function populateCommentTable(results) {
const row = tbody.insertRow();
const idCell = row.insertCell();
const commentCell = row.insertCell();
const pathCell = row.insertCell();
const distanceCell = row.insertCell();
const scoreCell = row.insertCell();
const span = document.createElement("span");
idCell.textContent = id;
span.className = "comment-cell";
span.textContent = info["proposed_comment"];
span.textContent = info["proposed_comment"].body;
commentCell.appendChild(span);
scoreCell.textContent = info["max_bleu_score"].toFixed(2);
pathCell.textContent = info["correct_file"] ? tick : cross;
distanceCell.textContent = info["distance"];
});
}

View File

@ -1,6 +1,7 @@
# routes/answers.py
from typing import Callable
from flask import Blueprint, request, jsonify, current_app, url_for
from utils.dataset import CommentGenSubmission
from utils.errors import InvalidJsonFormatError
from utils.process_data import evaluate_comments, evaluate_refinement
from utils.observer import SocketObserver, Status, Subject
@ -14,16 +15,22 @@ router = Blueprint('answers', __name__, url_prefix='/answers')
ALLOWED_EXT = {'json'}
def validate_json_format_for_comment_gen(data: str) -> dict[str, str]:
def validate_json_format_for_comment_gen(data: str) -> dict[str, CommentGenSubmission]:
try:
obj = json.loads(data)
ret = {}
if not isinstance(obj, dict):
raise InvalidJsonFormatError("Submitted json doesn't contain an object")
if not all(isinstance(v, str) for v in obj.values()):
raise InvalidJsonFormatError(
"Submitted json object must only be str -> str. Namely id -> comment"
)
return obj
for id, submission in obj.items():
if not isinstance(id, str):
raise InvalidJsonFormatError("The id of a particular submission must be a string")
if not isinstance(submission, dict):
raise InvalidJsonFormatError(
"A particular submission must be a dictionary of type {'path' -> str, 'line_from' -> int, 'line_to' -> int, 'body' -> str}"
)
ret[id] = CommentGenSubmission.json_parse(submission)
return ret
except InvalidJsonFormatError as e:
raise e
except Exception:

View File

@ -3,6 +3,8 @@ from enum import Enum
from typing import Any, Dict, List, Optional, Union
import json, uuid
from utils.errors import InvalidJsonFormatError
# fmt: off
@dataclass
class FileData:
@ -48,6 +50,29 @@ class Metadata:
return f"{self.id}_{state.value}.tar.gz"
return f"{self.repo.replace('/', '_')}_{self.pr_number}_{state.value}.tar.gz"
@dataclass
class CommentGenSubmission:
path: str
line_from: int
line_to: Optional[int]
body: str
@classmethod
def json_parse(cls, data) -> "CommentGenSubmission":
if not isinstance(data, dict):
raise InvalidJsonFormatError("Submitted json doesn't contain an object")
if not all(k in data and isinstance(data[k], str) for k in ["path", "body"]):
raise InvalidJsonFormatError("Submitted json doesn't contain the required fields")
if not all(k in data and isinstance(data[k], (int, type(None))) for k in ["line_from", "line_to"]):
raise InvalidJsonFormatError("Submitted json doesn't contain the required fields")
return cls(
path=data["path"],
line_from=data["line_from"],
line_to=data.get("line_to"),
body=data["body"],
)
@dataclass
class DatasetEntry:
metadata: Metadata

View File

@ -1,9 +1,10 @@
import json
import sys
from typing_extensions import Callable
from utils.build_handlers import get_build_handler
from .paths import get_project_path
from sacrebleu import sentence_bleu as bleu
from utils.dataset import ArchiveState, Dataset
from utils.dataset import ArchiveState, Comment, CommentGenSubmission, Dataset
REFERENCE_MAP = Dataset.from_json(
str(get_project_path('../data/dataset.json'))
@ -12,32 +13,78 @@ REFERENCE_MAP = Dataset.from_json(
ARCHIVES_ROOT = str(get_project_path('../data/archives'))
def comment_distance(submission: CommentGenSubmission, entry: Comment):
if entry.from_ is None and entry.to is None:
return "NA"
if submission.line_from is None and submission.line_to is None:
return "NA"
# Collapse missing endpoints to the one defined endpoint
# For entry:
start1 = entry.from_ if entry.from_ is not None else entry.to
end1 = entry.to if entry.to is not None else entry.from_
# For submission:
start2 = submission.line_from if submission.line_from is not None else submission.line_to
end2 = submission.line_to if submission.line_to is not None else submission.line_from
# Now both start1,end1 and start2,end2 are non-None
# Normalize in case from > to (just in case):
if start1 > end1:
start1, end1 = end1, start1
if start2 > end2:
start2, end2 = end2, start2
# Check for overlap
if end1 >= start2 and end2 >= start1:
return 0
# Otherwise compute gap
if end1 < start2:
return start2 - end1
else: # end2 < start1
return start1 - end2
def evaluate_comments(
answers: dict[str, str],
answers: dict[str, CommentGenSubmission],
percent_cb: Callable[[float], None] = lambda _: None,
complete_cb: Callable[[dict], None] = lambda _: None,
):
# print("Started processing comments...")
total = len(answers)
results = {}
for i, (id_, gen) in enumerate(answers.items(), 1):
for i, (id_, submission) in enumerate(answers.items(), 1):
# print(f"[INFO] Processing {id_} ({i}/{total}: {i/total:.2%})...")
if id_ not in REFERENCE_MAP:
print(f"[WARNING] skipping {id} since it is not present in dataset", file=sys.stderr)
continue
entry = REFERENCE_MAP[id_]
max_score = 0
scores = []
# print(f"[INFO] Processing paraphrases...")
for p in [entry.comments[0].body] + entry.comments[0].paraphrases:
score = round(bleu(gen, [p]).score, 2)
score = round(bleu(submission.body, [p]).score, 2)
scores.append(score)
max_score = max(max_score, score)
correct_file = submission.path == entry.comments[0].file
# print(f"[INFO] Getting distance...")
if correct_file:
distance = comment_distance(submission, entry.comments[0])
else:
distance = "NA"
# print(f"[INFO] Populating result...")
results[id_] = {
'max_bleu_score': max_score,
'bleu_scores': scores,
'proposed_comment': gen,
'proposed_comment': submission.__dict__,
'correct_file': correct_file,
'distance': distance,
}
percent_cb(int(i / total * 100))
# print(f"[INFO] Sending results...")
complete_cb(results)
return results