mirror of
https://github.com/karma-riuk/crab-webapp.git
synced 2025-07-04 22:08:12 +02:00
now comments get evaluated correctly, not just
base on bleu score but also on location of file and line number
This commit is contained in:
@ -108,6 +108,7 @@ table thead {
|
||||
|
||||
table thead th {
|
||||
padding: 8px;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
table thead th:hover {
|
||||
@ -146,8 +147,18 @@ table tbody td:nth-child(1) {
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
/* style score column values */
|
||||
/* style correct file column values */
|
||||
.results-container#comment table tbody td:nth-child(3) {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
/* style distance column values */
|
||||
.results-container#comment table tbody td:nth-child(4) {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
/* style distance column values */
|
||||
.results-container#comment table tbody td:nth-child(5) {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
|
@ -92,6 +92,8 @@
|
||||
<tr>
|
||||
<th>id</th>
|
||||
<th>Proposed comment</th>
|
||||
<th>Correct file</th>
|
||||
<th>Distance</th>
|
||||
<th>Max bleu score</th>
|
||||
</tr>
|
||||
</thead>
|
||||
|
@ -37,14 +37,18 @@ function populateCommentTable(results) {
|
||||
const row = tbody.insertRow();
|
||||
const idCell = row.insertCell();
|
||||
const commentCell = row.insertCell();
|
||||
const pathCell = row.insertCell();
|
||||
const distanceCell = row.insertCell();
|
||||
const scoreCell = row.insertCell();
|
||||
const span = document.createElement("span");
|
||||
|
||||
idCell.textContent = id;
|
||||
span.className = "comment-cell";
|
||||
span.textContent = info["proposed_comment"];
|
||||
span.textContent = info["proposed_comment"].body;
|
||||
commentCell.appendChild(span);
|
||||
scoreCell.textContent = info["max_bleu_score"].toFixed(2);
|
||||
pathCell.textContent = info["correct_file"] ? tick : cross;
|
||||
distanceCell.textContent = info["distance"];
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
# routes/answers.py
|
||||
from typing import Callable
|
||||
from flask import Blueprint, request, jsonify, current_app, url_for
|
||||
from utils.dataset import CommentGenSubmission
|
||||
from utils.errors import InvalidJsonFormatError
|
||||
from utils.process_data import evaluate_comments, evaluate_refinement
|
||||
from utils.observer import SocketObserver, Status, Subject
|
||||
@ -14,16 +15,22 @@ router = Blueprint('answers', __name__, url_prefix='/answers')
|
||||
ALLOWED_EXT = {'json'}
|
||||
|
||||
|
||||
def validate_json_format_for_comment_gen(data: str) -> dict[str, str]:
|
||||
def validate_json_format_for_comment_gen(data: str) -> dict[str, CommentGenSubmission]:
|
||||
try:
|
||||
obj = json.loads(data)
|
||||
ret = {}
|
||||
if not isinstance(obj, dict):
|
||||
raise InvalidJsonFormatError("Submitted json doesn't contain an object")
|
||||
if not all(isinstance(v, str) for v in obj.values()):
|
||||
raise InvalidJsonFormatError(
|
||||
"Submitted json object must only be str -> str. Namely id -> comment"
|
||||
)
|
||||
return obj
|
||||
|
||||
for id, submission in obj.items():
|
||||
if not isinstance(id, str):
|
||||
raise InvalidJsonFormatError("The id of a particular submission must be a string")
|
||||
if not isinstance(submission, dict):
|
||||
raise InvalidJsonFormatError(
|
||||
"A particular submission must be a dictionary of type {'path' -> str, 'line_from' -> int, 'line_to' -> int, 'body' -> str}"
|
||||
)
|
||||
ret[id] = CommentGenSubmission.json_parse(submission)
|
||||
return ret
|
||||
except InvalidJsonFormatError as e:
|
||||
raise e
|
||||
except Exception:
|
||||
|
@ -3,6 +3,8 @@ from enum import Enum
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
import json, uuid
|
||||
|
||||
from utils.errors import InvalidJsonFormatError
|
||||
|
||||
# fmt: off
|
||||
@dataclass
|
||||
class FileData:
|
||||
@ -48,6 +50,29 @@ class Metadata:
|
||||
return f"{self.id}_{state.value}.tar.gz"
|
||||
return f"{self.repo.replace('/', '_')}_{self.pr_number}_{state.value}.tar.gz"
|
||||
|
||||
@dataclass
|
||||
class CommentGenSubmission:
|
||||
path: str
|
||||
line_from: int
|
||||
line_to: Optional[int]
|
||||
body: str
|
||||
|
||||
@classmethod
|
||||
def json_parse(cls, data) -> "CommentGenSubmission":
|
||||
if not isinstance(data, dict):
|
||||
raise InvalidJsonFormatError("Submitted json doesn't contain an object")
|
||||
if not all(k in data and isinstance(data[k], str) for k in ["path", "body"]):
|
||||
raise InvalidJsonFormatError("Submitted json doesn't contain the required fields")
|
||||
if not all(k in data and isinstance(data[k], (int, type(None))) for k in ["line_from", "line_to"]):
|
||||
raise InvalidJsonFormatError("Submitted json doesn't contain the required fields")
|
||||
|
||||
return cls(
|
||||
path=data["path"],
|
||||
line_from=data["line_from"],
|
||||
line_to=data.get("line_to"),
|
||||
body=data["body"],
|
||||
)
|
||||
|
||||
@dataclass
|
||||
class DatasetEntry:
|
||||
metadata: Metadata
|
||||
|
@ -1,9 +1,10 @@
|
||||
import json
|
||||
import sys
|
||||
from typing_extensions import Callable
|
||||
from utils.build_handlers import get_build_handler
|
||||
from .paths import get_project_path
|
||||
from sacrebleu import sentence_bleu as bleu
|
||||
from utils.dataset import ArchiveState, Dataset
|
||||
from utils.dataset import ArchiveState, Comment, CommentGenSubmission, Dataset
|
||||
|
||||
REFERENCE_MAP = Dataset.from_json(
|
||||
str(get_project_path('../data/dataset.json'))
|
||||
@ -12,32 +13,78 @@ REFERENCE_MAP = Dataset.from_json(
|
||||
ARCHIVES_ROOT = str(get_project_path('../data/archives'))
|
||||
|
||||
|
||||
def comment_distance(submission: CommentGenSubmission, entry: Comment):
|
||||
if entry.from_ is None and entry.to is None:
|
||||
return "NA"
|
||||
if submission.line_from is None and submission.line_to is None:
|
||||
return "NA"
|
||||
|
||||
# Collapse missing endpoints to the one defined endpoint
|
||||
# For entry:
|
||||
start1 = entry.from_ if entry.from_ is not None else entry.to
|
||||
end1 = entry.to if entry.to is not None else entry.from_
|
||||
# For submission:
|
||||
start2 = submission.line_from if submission.line_from is not None else submission.line_to
|
||||
end2 = submission.line_to if submission.line_to is not None else submission.line_from
|
||||
|
||||
# Now both start1,end1 and start2,end2 are non-None
|
||||
# Normalize in case from > to (just in case):
|
||||
if start1 > end1:
|
||||
start1, end1 = end1, start1
|
||||
if start2 > end2:
|
||||
start2, end2 = end2, start2
|
||||
|
||||
# Check for overlap
|
||||
if end1 >= start2 and end2 >= start1:
|
||||
return 0
|
||||
|
||||
# Otherwise compute gap
|
||||
if end1 < start2:
|
||||
return start2 - end1
|
||||
else: # end2 < start1
|
||||
return start1 - end2
|
||||
|
||||
|
||||
def evaluate_comments(
|
||||
answers: dict[str, str],
|
||||
answers: dict[str, CommentGenSubmission],
|
||||
percent_cb: Callable[[float], None] = lambda _: None,
|
||||
complete_cb: Callable[[dict], None] = lambda _: None,
|
||||
):
|
||||
# print("Started processing comments...")
|
||||
total = len(answers)
|
||||
results = {}
|
||||
for i, (id_, gen) in enumerate(answers.items(), 1):
|
||||
for i, (id_, submission) in enumerate(answers.items(), 1):
|
||||
# print(f"[INFO] Processing {id_} ({i}/{total}: {i/total:.2%})...")
|
||||
if id_ not in REFERENCE_MAP:
|
||||
print(f"[WARNING] skipping {id} since it is not present in dataset", file=sys.stderr)
|
||||
continue
|
||||
entry = REFERENCE_MAP[id_]
|
||||
max_score = 0
|
||||
scores = []
|
||||
# print(f"[INFO] Processing paraphrases...")
|
||||
for p in [entry.comments[0].body] + entry.comments[0].paraphrases:
|
||||
score = round(bleu(gen, [p]).score, 2)
|
||||
score = round(bleu(submission.body, [p]).score, 2)
|
||||
scores.append(score)
|
||||
max_score = max(max_score, score)
|
||||
|
||||
correct_file = submission.path == entry.comments[0].file
|
||||
# print(f"[INFO] Getting distance...")
|
||||
if correct_file:
|
||||
distance = comment_distance(submission, entry.comments[0])
|
||||
else:
|
||||
distance = "NA"
|
||||
|
||||
# print(f"[INFO] Populating result...")
|
||||
results[id_] = {
|
||||
'max_bleu_score': max_score,
|
||||
'bleu_scores': scores,
|
||||
'proposed_comment': gen,
|
||||
'proposed_comment': submission.__dict__,
|
||||
'correct_file': correct_file,
|
||||
'distance': distance,
|
||||
}
|
||||
percent_cb(int(i / total * 100))
|
||||
|
||||
# print(f"[INFO] Sending results...")
|
||||
complete_cb(results)
|
||||
return results
|
||||
|
||||
|
Reference in New Issue
Block a user