From f939f396a6fa777d3bd9a5fbef75df18a849172a Mon Sep 17 00:00:00 2001 From: Yoav Katz Date: Sun, 11 Jan 2026 12:59:23 +0200 Subject: [PATCH] Fix bug in parsing LLM as Judges results returned in MT bench style format [[ ]]... If the number inside was large it would return score > 1.0. Signed-off-by: Yoav Katz --- src/unitxt/processors.py | 5 ++++- tests/library/test_postprocessors.py | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/unitxt/processors.py b/src/unitxt/processors.py index 6f13e10a33..95765e8911 100644 --- a/src/unitxt/processors.py +++ b/src/unitxt/processors.py @@ -315,7 +315,10 @@ class ExtractMtBenchRatingJudgment(FieldOperator): def process_value(self, text: Any) -> Any: match = re.search(r"\[\[([\s*\d]+\.?[\d]*\s*)(/\s*10)?\s*\]\]", text) try: - return float(match.group(1)) / 10 + score = float(match.group(1)) / 10 + if score > 1.0 or score < 0: + return 0.0 + return score except: return 0.0 diff --git a/tests/library/test_postprocessors.py b/tests/library/test_postprocessors.py index 65645bc06e..fb06758367 100644 --- a/tests/library/test_postprocessors.py +++ b/tests/library/test_postprocessors.py @@ -377,8 +377,9 @@ def test_extract_mt_bench_rating_judgment(self): "[[9]] because", "good", "bad [[x]]", + "[[1232]]", ] - targets = [0.3, 0.6, 0.62, 0.9, 0.0, 0.0] + targets = [0.3, 0.6, 0.62, 0.9, 0.0, 0.0, 0.0] check_operator( operator=postprocessor,