From f939f396a6fa777d3bd9a5fbef75df18a849172a Mon Sep 17 00:00:00 2001
From: Yoav Katz <katz@il.ibm.com>
Date: Sun, 11 Jan 2026 12:59:23 +0200
Subject: [PATCH] Fix bug in parsing LLM as Judges results returned in MT bench
 style format [[ ]]...  If the number inside was large it would return score >
 1.0.

Signed-off-by: Yoav Katz <katz@il.ibm.com>
---
 src/unitxt/processors.py             | 5 ++++-
 tests/library/test_postprocessors.py | 3 ++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/unitxt/processors.py b/src/unitxt/processors.py
index 6f13e10a33..95765e8911 100644
--- a/src/unitxt/processors.py
+++ b/src/unitxt/processors.py
@@ -315,7 +315,10 @@ class ExtractMtBenchRatingJudgment(FieldOperator):
     def process_value(self, text: Any) -> Any:
         match = re.search(r"\[\[([\s*\d]+\.?[\d]*\s*)(/\s*10)?\s*\]\]", text)
         try:
-            return float(match.group(1)) / 10
+            score = float(match.group(1)) / 10
+            if score > 1.0 or score < 0:
+                return 0.0
+            return score
         except:
             return 0.0
 
diff --git a/tests/library/test_postprocessors.py b/tests/library/test_postprocessors.py
index 65645bc06e..fb06758367 100644
--- a/tests/library/test_postprocessors.py
+++ b/tests/library/test_postprocessors.py
@@ -377,8 +377,9 @@ def test_extract_mt_bench_rating_judgment(self):
             "[[9]] because",
             "good",
             "bad [[x]]",
+            "[[1232]]",
         ]
-        targets = [0.3, 0.6, 0.62, 0.9, 0.0, 0.0]
+        targets = [0.3, 0.6, 0.62, 0.9, 0.0, 0.0, 0.0]
 
         check_operator(
             operator=postprocessor,