From 295ab07d6aceb75b34fdf72ea6ebe83eac4bde38 Mon Sep 17 00:00:00 2001 From: Bo Li Date: Thu, 23 Apr 2026 15:05:35 +0800 Subject: [PATCH] fix(api/task): guard empty results list in process_results MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For generate_until / generate_visual_cot tasks, when a sample's generation failed upstream (model raised, retries exhausted, etc.) the results list can be empty. The existing isinstance check then falls through to `results[0]` on the list-of-list branch and raises IndexError — which aborts the full postprocess loop for that task, not just the missing sample. Add a leading `results and` so an empty list falls through to the else branch that does `[res.strip() for res in results]` (empty output) — downstream task-level process_results then receives an empty list and can decide how to score the missing sample without taking down the whole task. --- lmms_eval/api/task.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lmms_eval/api/task.py b/lmms_eval/api/task.py index ddf817485..f52295a3d 100755 --- a/lmms_eval/api/task.py +++ b/lmms_eval/api/task.py @@ -1576,7 +1576,10 @@ def construct_requests(self, doc_id: int, ctx: str, **kwargs) -> Union[List[Inst @retry(stop=(stop_after_attempt(5) | stop_after_delay(1200)), wait=wait_fixed(2)) def process_results(self, doc, results, full_docs=None): if self.OUTPUT_TYPE in ("generate_until", "generate_visual_cot"): - if isinstance(results, list) and isinstance(results[0], list): + # Guard empty results so results[0] below does not IndexError for + # samples whose generation failed. Downstream process_results then + # receives an empty list and can decide how to score the miss. + if results and isinstance(results, list) and isinstance(results[0], list): results = [res.strip() for res in results[0]] else: results = [res.strip() for res in results]