From 686500fc2ef9cdb077d5f01c62ec4162835bd431 Mon Sep 17 00:00:00 2001 From: Karma Riuk Date: Sun, 11 May 2025 09:34:56 +0200 Subject: [PATCH] max_n for bleu score is now the smallest number between the number of tokens in candidate, reference or 4 --- src/utils/process_data.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/utils/process_data.js b/src/utils/process_data.js index 641ab46..76e0330 100644 --- a/src/utils/process_data.js +++ b/src/utils/process_data.js @@ -20,6 +20,7 @@ export const evaluate_comments = (answers, percent_cb) => { let i = 0; const results = {}; for (const [id, generated_comment] of Object.entries(answers)) { + const n_tokens_generated = generated_comment.trim().split(/\s+/).length; // console.log(`Processing ${i} ${id}...`); if (!(id in REFERENCE_MAP)) { // throw new Error(`id: "${id}" is not present in the dataset`); @@ -31,7 +32,9 @@ export const evaluate_comments = (answers, percent_cb) => { let maxScore = 0; const scores = []; for (const paraphrase of paraphrases) { - const score = bleu(paraphrase, generated_comment, 4); // TODO: ask prof what number show be here + const n_tokens_paraphrase = paraphrase.trim().split(/\s+/).length; + const max_n = Math.min(n_tokens_generated, n_tokens_paraphrase, 4); + const score = bleu(paraphrase, generated_comment, max_n); scores.push(score); maxScore = Math.max(score, maxScore); }