From 207d839cd4170879f385c18f1ba8b55670ae6b4d Mon Sep 17 00:00:00 2001
From: Leander Maben <leander.maben@gmail.com>
Date: Thu, 1 Jan 2026 18:46:03 -0500
Subject: [PATCH 1/3] Add Custom Finish Tool

---
 configs/skyrl-experiments/read-only.yaml |   1 +
 configs/skyrl-experiments/terminal.yaml  |   1 +
 src/prompts/templates/system_prompt.j2   |  75 +++--
 src/tools/localization_finish.py         | 361 +++++++++++++++++++++++
 tests/tools/test_localization_finish.py  | 263 +++++++++++++++++
 5 files changed, 681 insertions(+), 20 deletions(-)
 create mode 100644 src/tools/localization_finish.py
 create mode 100644 tests/tools/test_localization_finish.py

diff --git a/configs/skyrl-experiments/read-only.yaml b/configs/skyrl-experiments/read-only.yaml
index 4d5525a..4f48460 100644
--- a/configs/skyrl-experiments/read-only.yaml
+++ b/configs/skyrl-experiments/read-only.yaml
@@ -9,6 +9,7 @@ tools:
   - glob
   - grep
   - terminal
+  - localization_finish
 
 prompts:
   system_prompt: "templates/system_prompt.j2"
diff --git a/configs/skyrl-experiments/terminal.yaml b/configs/skyrl-experiments/terminal.yaml
index 6ad62c6..613ad3f 100644
--- a/configs/skyrl-experiments/terminal.yaml
+++ b/configs/skyrl-experiments/terminal.yaml
@@ -7,6 +7,7 @@ reward:
 
 tools:
   - terminal
+  - localization_finish
 
 prompts:
   system_prompt: "templates/system_prompt.j2"
diff --git a/src/prompts/templates/system_prompt.j2 b/src/prompts/templates/system_prompt.j2
index f5a67aa..462f7ba 100644
--- a/src/prompts/templates/system_prompt.j2
+++ b/src/prompts/templates/system_prompt.j2
@@ -36,12 +36,40 @@ You are given access to the codebase in a linux file system.
   * Read targeted line ranges around matches using `sed -n 'START,ENDp'`
   * Only read additional chunks if the initial sections are relevant
 
-### Final Answer Format (REQUIRED)
-- You MUST return your final answer in backticks ``` ... ```
-- Format: ```\nfull_path1/file1.py\nclass: MyClass1\nfunction: my_function1\n\nfull_path2/file2.py\nfunction: MyClass2.my_function2\n\nfull_path3/file3.py\nfunction: my_function3\n```
-- List one file path per line
-- Use relative paths as they appear in the repository
-- DO NOT include any other text inside the backticks
+### Submitting Your Answer (REQUIRED)
+
+When you have identified all relevant locations, use the `finish` tool to submit your results.
+
+**Format Requirements:**
+- Wrap your answer in triple backticks ``` ... ```
+- Each location must start with a file path (ending in .py)
+- Class name is OPTIONAL - include only if the change is within a specific class
+- Function name is OPTIONAL - include only if the change is at function/method level
+
+**When to include what:**
+- File-level changes (imports, globals, new classes): List just the file
+- Class-level changes (new methods, attributes): List file + class (no function)
+- Standalone function: List file + function (no class)
+- Method in a class: List file + class + function
+
+**Example formats:**
+```
+# File-only (imports, globals, new class added)
+path/to/file1.py
+
+# File + Class (class-level changes)
+path/to/file2.py
+class: MyClass
+
+# File + Function (standalone function)
+path/to/file3.py
+function: my_function
+
+# File + Class + Function (method change)
+path/to/file4.py
+class: MyClass
+function: my_method
+```
 
 ## SEARCH STRATEGY
 
@@ -61,32 +89,39 @@ You are given access to the codebase in a linux file system.
 3. **Final Verification**: Confirm your file list
    - Verify each candidate file is truly relevant
    - Ensure you haven't missed related files
-   - Return your answer in backticks ``` ... ```
+   - Use the `finish` tool to submit your answer
 
 ## CRITICAL RULES
 - NEVER exceed 5 parallel bash tool calls in a single turn
-- NEVER respond without wrapping your file list in backticks ```
+- ALWAYS use the `finish` tool when you're done (wrap your answer in backticks)
 - ALWAYS use bash tool to search (do not guess file locations)
 - NEVER read entire large files - always read in chunks (100-line ranges)
 - Check file size with `wc -l` before reading
 - Read file contents in chunks to verify relevance before including them
 - Return file paths as they appear in the repository. Do not begin the path with "./"
 - Aim for high precision (all files relevant) and high recall (no relevant files missed)
+- Class and function names are OPTIONAL - only include when changes are at that level
 
-## EXAMPLE OUTPUT
+## EXAMPLE SUBMISSION
 
-After exploring the codebase, return your answer like this:
+When ready, call the `finish` tool with your findings:
 
-Your final output should list the locations requiring modification, wrapped with triple backticks ```
-Each location should include the file path, class name (if applicable), and function name. Here is an example Output:
 ```
-full_path1/file1.py
-class: MyClass1
-function: my_function1
+src/utils/parser.py
+class: DataParser
+function: parse_json
 
-full_path2/file2.py
-function: MyClass2.my_function2
+src/models/user.py
+class: User
+
+src/config.py
+
+src/api/endpoints.py
+function: handle_request
+```
 
-full_path3/file3.py
-function: my_function3
-```
\ No newline at end of file
+**Note:** In this example:
+- `parser.py` has a specific method change
+- `user.py` has a class-level change (no specific function)
+- `config.py` has file-level changes (imports/globals)
+- `endpoints.py` has a standalone function change
\ No newline at end of file
diff --git a/src/tools/localization_finish.py b/src/tools/localization_finish.py
new file mode 100644
index 0000000..a762904
--- /dev/null
+++ b/src/tools/localization_finish.py
@@ -0,0 +1,361 @@
+"""Custom finish tool for code localization tasks.
+
+This tool allows the agent to submit localization results in a flexible format where:
+- File path is required
+- Class name is optional
+- Function name is optional
+"""
+
+import os
+from typing import TYPE_CHECKING
+from collections.abc import Sequence
+
+from pydantic import Field
+from rich.text import Text
+
+from openhands.sdk import (
+    Action,
+    Observation,
+    ToolDefinition,
+)
+from openhands.sdk.tool import ToolExecutor
+
+from src.tools import tool
+
+if TYPE_CHECKING:
+    from openhands.sdk.conversation.base import BaseConversation
+
+
+class LocalizationFinishAction(Action):
+    """Action for submitting final localization results."""
+
+    message: str = Field(
+        description="""Your final localization results in the specified format.
+
+Format:
+```
+path/to/file1.py
+class: ClassName
+function: method_name
+
+path/to/file2.py
+function: standalone_function
+
+path/to/file3.py
+```
+
+Requirements:
+- Each location must have a file path
+- Class name is optional (omit if change is file-level or function is standalone)
+- Function name is optional (omit if change is file-level or class-level only)
+- Wrap your answer in triple backticks
+
+Example for different scenarios:
+- File-level change (imports, globals): Just list the file
+- Class-level change (new method, attributes): List file + class
+- Method change: List file + class + function
+- Standalone function: List file + function
+"""
+    )
+
+    @property
+    def visualize(self) -> Text:
+        """Return Rich Text representation of this action."""
+        content = Text()
+        content.append("Submitting localization results:\n", style="bold blue")
+        content.append(self.message)
+        return content
+
+
+class LocalizationFinishObservation(Observation):
+    """Observation returned after submitting localization results."""
+
+    success: bool = Field(default=True, description="Whether submission was successful")
+    num_locations: int = Field(default=0, description="Number of locations submitted")
+    validation_message: str = Field(default="", description="Validation feedback")
+    details: dict = Field(default_factory=dict, description="Additional details")
+
+    @property
+    def visualize(self) -> Text:
+        """Return Rich Text representation of this observation."""
+        content = Text()
+        if self.success:
+            content.append(f"✓ Successfully submitted {self.num_locations} location(s)\n", style="bold green")
+        else:
+            content.append(f"✗ Submission failed\n", style="bold red")
+            content.append(f"{self.validation_message}\n", style="yellow")
+        return content
+
+
+def parse_localization_output(raw_output: str) -> list[dict]:
+    """Parse localization output with optional class and function.
+
+    This is an enhanced version of parse_simple_output that handles:
+    - File-only entries (no class or function)
+    - File + class entries (no function)
+    - File + function entries (no class)
+    - File + class + function entries
+
+    Args:
+        raw_output: Raw text output to parse
+
+    Returns:
+        List of dictionaries with 'file', 'class' (optional), 'function' (optional)
+    """
+    # Remove triple backticks and whitespace
+    raw_output = raw_output.strip("` \n")
+
+    locations = []
+    current_file = None
+    current_class = None
+    current_function = None
+
+    lines = raw_output.strip().split("\n")
+
+    for line in lines:
+        line = line.strip()
+
+        if not line:
+            # Empty line - save current location if we have a file
+            if current_file:
+                locations.append({
+                    "file": current_file,
+                    "class": current_class,
+                    "function": current_function,
+                })
+                current_file = None
+                current_class = None
+                current_function = None
+            continue
+
+        # Check if this is a file path (ends with .py)
+        if line.endswith(".py"):
+            # Save previous location if exists
+            if current_file:
+                locations.append({
+                    "file": current_file,
+                    "class": current_class,
+                    "function": current_function,
+                })
+            # Start new location
+            current_file = line
+            current_class = None
+            current_function = None
+            continue
+
+        # Parse class declaration
+        if line.startswith("class:"):
+            class_name = line[len("class:"):].strip()
+            current_class = class_name
+            continue
+
+        # Parse function/method declaration
+        if line.startswith("function:") or line.startswith("method:"):
+            func_text = line.split(":", 1)[1].strip()
+            func_name = func_text.split()[0].strip("() ")
+
+            # Check if function includes class prefix (e.g., "MyClass.my_method")
+            if "." in func_name:
+                parts = func_name.split(".", 1)
+                current_class = parts[0]
+                current_function = parts[1]
+            else:
+                current_function = func_name
+            continue
+
+    # Don't forget the last location
+    if current_file:
+        locations.append({
+            "file": current_file,
+            "class": current_class,
+            "function": current_function,
+        })
+
+    return locations
+
+
+class LocalizationFinishExecutor(ToolExecutor):
+    """Executor for localization finish tool with validation."""
+
+    def __init__(self, workspace_dir: str | None = None):
+        """Initialize the executor.
+
+        Args:
+            workspace_dir: Optional workspace directory to validate file existence.
+        """
+        self.workspace_dir = workspace_dir
+
+    def __call__(
+        self,
+        action: LocalizationFinishAction,
+        conversation: "BaseConversation | None" = None,
+    ) -> LocalizationFinishObservation:
+        """Execute the finish action with validation.
+
+        Args:
+            action: The localization finish action to execute
+            conversation: Optional conversation context
+
+        Returns:
+            LocalizationFinishObservation with validation results
+        """
+
+        try:
+            # Parse the output to validate format
+            locations = parse_localization_output(action.message)
+            num_locs = len(locations)
+
+            # Validation 1: Check if any locations were found
+            if num_locs == 0:
+                return LocalizationFinishObservation(
+                    success=False,
+                    num_locations=0,
+                    validation_message=(
+                        "No valid locations found. Please provide at least one file path "
+                        "in the correct format wrapped in triple backticks."
+                    ),
+                    details={"error": "empty_output"}
+                )
+
+            # Validation 2: Check each location has a file path
+            errors = []
+            for i, loc in enumerate(locations):
+                if not loc.get('file'):
+                    errors.append(f"Location {i+1} is missing a file path")
+
+            if errors:
+                return LocalizationFinishObservation(
+                    success=False,
+                    num_locations=0,
+                    validation_message="\n".join(errors),
+                    details={"error": "missing_file_paths", "locations": locations}
+                )
+
+            # Validation 3: Check file existence (if workspace provided)
+            if self.workspace_dir:
+                missing_files = []
+                for loc in locations:
+                    file_path = loc['file']
+                    full_path = os.path.join(self.workspace_dir, file_path)
+                    if not os.path.exists(full_path):
+                        missing_files.append(file_path)
+
+                if missing_files:
+                    return LocalizationFinishObservation(
+                        success=False,
+                        num_locations=num_locs,
+                        validation_message=(
+                            f"Warning: {len(missing_files)} file(s) not found in workspace:\n" +
+                            "\n".join(f"  - {f}" for f in missing_files[:5]) +
+                            (f"\n  ... and {len(missing_files) - 5} more" if len(missing_files) > 5 else "")
+                        ),
+                        details={
+                            "warning": "files_not_found",
+                            "missing_files": missing_files,
+                            "locations": locations
+                        }
+                    )
+
+            # Success!
+            return LocalizationFinishObservation(
+                success=True,
+                num_locations=num_locs,
+                validation_message=f"Successfully submitted {num_locs} location(s).",
+                details={"locations": locations}
+            )
+
+        except Exception as e:
+            # Parsing failed
+            return LocalizationFinishObservation(
+                success=False,
+                num_locations=0,
+                validation_message=(
+                    f"Error parsing output: {str(e)}\n\n"
+                    "Please ensure your output follows the correct format:\n"
+                    "```\n"
+                    "path/to/file.py\n"
+                    "class: ClassName\n"
+                    "function: method_name\n"
+                    "```"
+                ),
+                details={"error": "parse_error", "exception": str(e)}
+            )
+
+
+TOOL_DESCRIPTION = """Submit your final code localization results.
+
+Use this tool when you have identified all relevant files, classes, and functions
+that need to be modified to address the issue described in the problem statement.
+
+Format your results as follows:
+```
+path/to/file1.py
+class: ClassName
+function: method_name
+
+path/to/file2.py
+function: standalone_function
+
+path/to/file3.py
+```
+
+Requirements:
+- Wrap your output in triple backticks (```)
+- Each location must start with a file path
+- Class name is OPTIONAL - include only if the change is within a specific class
+- Function name is OPTIONAL - include only if the change is at function/method level
+
+When to omit class/function:
+- File-level only (imports, globals, new classes): List just the file
+- Class-level only (new methods, attributes): List file + class (no function)
+- Standalone function: List file + function (no class)
+- Method in class: List file + class + function
+
+The tool will validate your submission and provide feedback if the format is incorrect.
+"""
+
+
+class LocalizationFinishTool(ToolDefinition[LocalizationFinishAction, LocalizationFinishObservation]):
+    """Tool for submitting final code localization results."""
+
+    @classmethod
+    def create(
+        cls,
+        conv_state,
+        workspace_dir: str | None = None,
+        **params
+    ) -> Sequence["LocalizationFinishTool"]:
+        """Create LocalizationFinishTool instance.
+
+        Args:
+            conv_state: Conversation state (provides workspace info)
+            workspace_dir: Optional workspace directory override
+            **params: Additional parameters
+
+        Returns:
+            A sequence containing a single LocalizationFinishTool instance.
+        """
+        # Get workspace from conv_state if not provided
+        if workspace_dir is None and hasattr(conv_state, 'workspace'):
+            workspace_dir = str(conv_state.workspace.working_dir)
+
+        executor = LocalizationFinishExecutor(workspace_dir=workspace_dir)
+
+        return [
+            cls(
+                action_type=LocalizationFinishAction,
+                observation_type=LocalizationFinishObservation,
+                description=TOOL_DESCRIPTION,
+                executor=executor,
+            )
+        ]
+
+
+@tool(name="finish")
+def _make_localization_finish_tool(conv_state) -> list[ToolDefinition]:
+    """Create localization finish tool.
+
+    This replaces the default finish tool with a localization-specific version
+    that validates the output format.
+    """
+    return LocalizationFinishTool.create(conv_state)
diff --git a/tests/tools/test_localization_finish.py b/tests/tools/test_localization_finish.py
new file mode 100644
index 0000000..2e348e9
--- /dev/null
+++ b/tests/tools/test_localization_finish.py
@@ -0,0 +1,263 @@
+"""Tests for the localization finish tool."""
+
+import os
+import tempfile
+import pytest
+from pathlib import Path
+
+from src.tools.localization_finish import (
+    LocalizationFinishAction,
+    LocalizationFinishExecutor,
+    LocalizationFinishObservation,
+)
+
+
+class TestLocalizationFinishExecutor:
+    """Tests for LocalizationFinishExecutor."""
+
+    def setup_method(self):
+        """Create executor and temp workspace for each test."""
+        self.temp_dir = tempfile.mkdtemp()
+        self.executor = LocalizationFinishExecutor(workspace_dir=self.temp_dir)
+
+    def teardown_method(self):
+        """Clean up temp workspace."""
+        import shutil
+        if os.path.exists(self.temp_dir):
+            shutil.rmtree(self.temp_dir)
+
+    def test_empty_message(self):
+        """Test with empty message."""
+        action = LocalizationFinishAction(message="")
+        result = self.executor(action)
+
+        assert result.success is False
+        assert result.num_locations == 0
+        assert "No valid locations" in result.validation_message
+
+    def test_malformed_input(self):
+        """Test with malformed input."""
+        action = LocalizationFinishAction(message="random text without proper format")
+        result = self.executor(action)
+
+        assert result.success is False
+        # Malformed input results in no locations found
+        assert "No valid locations" in result.validation_message
+
+    def test_file_only_valid(self):
+        """Test valid file-level localization (no class or function)."""
+        # Create the file in workspace
+        file_path = "test_file.py"
+        Path(self.temp_dir, file_path).touch()
+
+        action = LocalizationFinishAction(message=f"""```
+{file_path}
+```""")
+        result = self.executor(action)
+
+        assert result.success is True
+        assert result.num_locations == 1
+        assert "Successfully submitted" in result.validation_message
+
+    def test_file_and_class_valid(self):
+        """Test valid class-level localization (no function)."""
+        file_path = "test_file.py"
+        Path(self.temp_dir, file_path).touch()
+
+        action = LocalizationFinishAction(message=f"""```
+{file_path}
+class: MyClass
+```""")
+        result = self.executor(action)
+
+        assert result.success is True
+        assert result.num_locations == 1
+
+    def test_file_and_function_valid(self):
+        """Test valid function-level localization (no class)."""
+        file_path = "test_file.py"
+        Path(self.temp_dir, file_path).touch()
+
+        action = LocalizationFinishAction(message=f"""```
+{file_path}
+function: my_function
+```""")
+        result = self.executor(action)
+
+        assert result.success is True
+        assert result.num_locations == 1
+
+    def test_complete_localization_valid(self):
+        """Test complete localization with file, class, and function."""
+        file_path = "test_file.py"
+        Path(self.temp_dir, file_path).touch()
+
+        action = LocalizationFinishAction(message=f"""```
+{file_path}
+class: MyClass
+function: my_method
+```""")
+        result = self.executor(action)
+
+        assert result.success is True
+        assert result.num_locations == 1
+
+    def test_multiple_locations_mixed_formats(self):
+        """Test multiple locations with different formats."""
+        # Create files
+        for i in range(1, 5):
+            Path(self.temp_dir, f"file{i}.py").touch()
+
+        action = LocalizationFinishAction(message="""```
+file1.py
+
+file2.py
+class: ClassA
+
+file3.py
+function: func_b
+
+file4.py
+class: ClassC
+function: method_d
+```""")
+        result = self.executor(action)
+
+        assert result.success is True
+        assert result.num_locations == 4
+        assert "Successfully submitted 4 location(s)" in result.validation_message
+
+    def test_missing_file_warning(self):
+        """Test that missing files trigger a warning."""
+        action = LocalizationFinishAction(message="""```
+nonexistent_file.py
+```""")
+        result = self.executor(action)
+
+        # Should still parse but with warning
+        assert result.success is False
+        assert result.num_locations == 1
+        assert "not found in workspace" in result.validation_message
+        assert "nonexistent_file.py" in result.validation_message
+
+    def test_some_files_missing(self):
+        """Test when some files exist and some don't."""
+        # Create only one file
+        Path(self.temp_dir, "exists.py").touch()
+
+        action = LocalizationFinishAction(message="""```
+exists.py
+
+missing.py
+```""")
+        result = self.executor(action)
+
+        assert result.success is False
+        assert result.num_locations == 2
+        assert "missing.py" in result.validation_message
+
+    def test_no_backticks(self):
+        """Test that output without backticks fails parsing."""
+        action = LocalizationFinishAction(message="""file.py
+class: MyClass
+""")
+        result = self.executor(action)
+
+        # Should fail because parse_simple_output expects backticks
+        assert result.success is False
+
+    def test_executor_without_workspace(self):
+        """Test executor without workspace validation."""
+        executor = LocalizationFinishExecutor(workspace_dir=None)
+
+        action = LocalizationFinishAction(message="""```
+any_file.py
+```""")
+        result = executor(action)
+
+        # Should succeed without file existence check
+        assert result.success is True
+        assert result.num_locations == 1
+
+    def test_nested_file_path(self):
+        """Test with nested file paths."""
+        # Create nested structure
+        nested_dir = Path(self.temp_dir, "src", "utils")
+        nested_dir.mkdir(parents=True)
+        Path(nested_dir, "helper.py").touch()
+
+        action = LocalizationFinishAction(message="""```
+src/utils/helper.py
+function: process
+```""")
+        result = self.executor(action)
+
+        assert result.success is True
+        assert result.num_locations == 1
+
+    def test_dotted_function_name(self):
+        """Test with ClassName.method_name format."""
+        file_path = "test.py"
+        Path(self.temp_dir, file_path).touch()
+
+        action = LocalizationFinishAction(message=f"""```
+{file_path}
+function: MyClass.my_method
+```""")
+        result = self.executor(action)
+
+        assert result.success is True
+        assert result.num_locations == 1
+        # Should parse as class=MyClass, function=my_method
+        assert result.details["locations"][0]["class"] == "MyClass"
+        assert result.details["locations"][0]["function"] == "my_method"
+
+    def test_multiple_missing_files_truncated(self):
+        """Test that many missing files are truncated in message."""
+        action = LocalizationFinishAction(message="""```
+file1.py
+file2.py
+file3.py
+file4.py
+file5.py
+file6.py
+file7.py
+```""")
+        result = self.executor(action)
+
+        assert result.success is False
+        assert result.num_locations == 7
+        # Should show truncation message
+        assert "and 2 more" in result.validation_message
+
+
+class TestLocalizationFinishObservation:
+    """Tests for LocalizationFinishObservation visualization."""
+
+    def test_success_visualization(self):
+        """Test visualization of successful observation."""
+        obs = LocalizationFinishObservation(
+            success=True,
+            num_locations=3,
+            validation_message="Successfully submitted 3 location(s)."
+        )
+        text = obs.visualize
+
+        assert "✓" in str(text)
+        assert "3 location(s)" in str(text)
+
+    def test_failure_visualization(self):
+        """Test visualization of failed observation."""
+        obs = LocalizationFinishObservation(
+            success=False,
+            num_locations=0,
+            validation_message="Missing file paths"
+        )
+        text = obs.visualize
+
+        assert "✗" in str(text)
+        assert "Missing file paths" in str(text)
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])

From 8b0d97a876b2438ece942f7c0610a94578c289cd Mon Sep 17 00:00:00 2001
From: Leander Maben <leander.maben@gmail.com>
Date: Fri, 2 Jan 2026 01:59:39 -0500
Subject: [PATCH 2/3] Take File Object arguments for the Finish Tool instead of
 String

---
 src/prompts/templates/system_prompt.j2        | 101 +++++---
 .../file_localization/module_rewards.py       |  30 ++-
 src/tools/localization_finish.py              | 243 +++++++-----------
 tests/tools/test_localization_finish.py       | 149 +++++------
 4 files changed, 246 insertions(+), 277 deletions(-)

diff --git a/src/prompts/templates/system_prompt.j2 b/src/prompts/templates/system_prompt.j2
index 462f7ba..a79f2d1 100644
--- a/src/prompts/templates/system_prompt.j2
+++ b/src/prompts/templates/system_prompt.j2
@@ -38,37 +38,49 @@ You are given access to the codebase in a linux file system.
 
 ### Submitting Your Answer (REQUIRED)
 
-When you have identified all relevant locations, use the `finish` tool to submit your results.
+When you have identified all relevant locations, use the `localization_finish` tool to submit your results.
 
 **Format Requirements:**
-- Wrap your answer in triple backticks ``` ... ```
-- Each location must start with a file path (ending in .py)
-- Class name is OPTIONAL - include only if the change is within a specific class
-- Function name is OPTIONAL - include only if the change is at function/method level
+Submit a structured list of locations. Each location is a JSON object with:
+- `file`: Path to the file (REQUIRED)
+- `class_name`: Class name (OPTIONAL - omit for file-level or standalone functions)
+- `function_name`: Function/method name (OPTIONAL - omit for file-level or class-level only)
 
 **When to include what:**
-- File-level changes (imports, globals, new classes): List just the file
-- Class-level changes (new methods, attributes): List file + class (no function)
-- Standalone function: List file + function (no class)
-- Method in a class: List file + class + function
+- File-level changes (imports, globals, new top-level classes): Just `file`
+- Class-level changes (new methods, attributes, entire class): `file` + `class_name`
+- Standalone function (top-level function): `file` + `function_name`
+- Method in a class: `file` + `class_name` + `function_name`
 
 **Example formats:**
+
+1. File-only (imports, globals, new class):
+```json
+{"file": "path/to/file1.py"}
 ```
-# File-only (imports, globals, new class added)
-path/to/file1.py
 
-# File + Class (class-level changes)
-path/to/file2.py
-class: MyClass
+2. File + Class (class-level changes):
+```json
+{"file": "path/to/file2.py", "class_name": "MyClass"}
+```
 
-# File + Function (standalone function)
-path/to/file3.py
-function: my_function
+3. File + Function (standalone function):
+```json
+{"file": "path/to/file3.py", "function_name": "my_function"}
+```
+
+4. File + Class + Function (method):
+```json
+{"file": "path/to/file4.py", "class_name": "MyClass", "function_name": "my_method"}
+```
 
-# File + Class + Function (method change)
-path/to/file4.py
-class: MyClass
-function: my_method
+5. Multiple locations:
+```json
+[
+  {"file": "src/parser.py", "class_name": "DataParser", "function_name": "parse_json"},
+  {"file": "src/models/user.py", "class_name": "User"},
+  {"file": "src/config.py"}
+]
 ```
 
 ## SEARCH STRATEGY
@@ -89,11 +101,11 @@ function: my_method
 3. **Final Verification**: Confirm your file list
    - Verify each candidate file is truly relevant
    - Ensure you haven't missed related files
-   - Use the `finish` tool to submit your answer
+   - Use the `localization_finish` tool to submit your answer
 
 ## CRITICAL RULES
 - NEVER exceed 5 parallel bash tool calls in a single turn
-- ALWAYS use the `finish` tool when you're done (wrap your answer in backticks)
+- ALWAYS use the `localization_finish` tool when you're done
 - ALWAYS use bash tool to search (do not guess file locations)
 - NEVER read entire large files - always read in chunks (100-line ranges)
 - Check file size with `wc -l` before reading
@@ -104,24 +116,31 @@ function: my_method
 
 ## EXAMPLE SUBMISSION
 
-When ready, call the `finish` tool with your findings:
-
-```
-src/utils/parser.py
-class: DataParser
-function: parse_json
-
-src/models/user.py
-class: User
-
-src/config.py
-
-src/api/endpoints.py
-function: handle_request
+When ready, call the `localization_finish` tool with your findings:
+
+```json
+[
+  {
+    "file": "src/utils/parser.py",
+    "class_name": "DataParser",
+    "function_name": "parse_json"
+  },
+  {
+    "file": "src/models/user.py",
+    "class_name": "User"
+  },
+  {
+    "file": "src/config.py"
+  },
+  {
+    "file": "src/api/endpoints.py",
+    "function_name": "handle_request"
+  }
+]
 ```
 
 **Note:** In this example:
-- `parser.py` has a specific method change
-- `user.py` has a class-level change (no specific function)
-- `config.py` has file-level changes (imports/globals)
-- `endpoints.py` has a standalone function change
\ No newline at end of file
+- `parser.py` has a specific method change (file + class + function)
+- `user.py` has a class-level change (file + class only)
+- `config.py` has file-level changes (file only)
+- `endpoints.py` has a standalone function change (file + function only)
\ No newline at end of file
diff --git a/src/rewards/file_localization/module_rewards.py b/src/rewards/file_localization/module_rewards.py
index 8be9d18..c0fd68c 100644
--- a/src/rewards/file_localization/module_rewards.py
+++ b/src/rewards/file_localization/module_rewards.py
@@ -1,17 +1,17 @@
 import logging
-from typing import Dict, List, Tuple
+from typing import Dict, List, Tuple, Union
 
-def parse_simple_output(raw_output: str) -> List[Dict[str, str]]:
+def parse_simple_output(raw_output: Union[str, List[Dict[str, str]]]) -> List[Dict[str, str]]:
     """
     Parse simplified agent output containing filename, optional class, and function.
 
     Args:
-        raw_output: Raw text output from the agent
+        raw_output: Either a raw text string OR a list of location dicts (for structured input)
 
     Returns:
         List of dictionaries with keys: 'file', 'class' (optional), 'function'
 
-    Example input format:
+    Example string input format:
         ```
         path/to/file1.py
         class: MyClass
@@ -21,12 +21,32 @@ def parse_simple_output(raw_output: str) -> List[Dict[str, str]]:
         function: standalone_function
         ```
 
-    Example output:
+    Example structured input format:
+        [
+            {'file': 'path/to/file1.py', 'class': 'MyClass', 'function': 'my_method'},
+            {'file': 'path/to/file2.py', 'class': None, 'function': 'standalone_function'}
+        ]
+
+    Example output (same for both):
         [
             {'file': 'path/to/file1.py', 'class': 'MyClass', 'function': 'my_method'},
             {'file': 'path/to/file2.py', 'class': None, 'function': 'standalone_function'}
         ]
     """
+    # Handle structured input (list of dicts)
+    if isinstance(raw_output, list):
+        # Already in the correct format (or close to it)
+        # Normalize field names: class_name -> class, function_name -> function
+        normalized = []
+        for loc in raw_output:
+            normalized.append({
+                'file': loc.get('file', ''),
+                'class': loc.get('class') or loc.get('class_name'),
+                'function': loc.get('function') or loc.get('function_name'),
+            })
+        return normalized
+
+    # Handle string input (legacy format)
     # Remove triple backticks and whitespace
     raw_output = raw_output.strip("` \n")
 
diff --git a/src/tools/localization_finish.py b/src/tools/localization_finish.py
index a762904..513d371 100644
--- a/src/tools/localization_finish.py
+++ b/src/tools/localization_finish.py
@@ -1,6 +1,6 @@
 """Custom finish tool for code localization tasks.
 
-This tool allows the agent to submit localization results in a flexible format where:
+This tool allows the agent to submit localization results in a structured format where:
 - File path is required
 - Class name is optional
 - Function name is optional
@@ -10,7 +10,7 @@
 from typing import TYPE_CHECKING
 from collections.abc import Sequence
 
-from pydantic import Field
+from pydantic import BaseModel, Field, computed_field
 from rich.text import Text
 
 from openhands.sdk import (
@@ -26,44 +26,62 @@
     from openhands.sdk.conversation.base import BaseConversation
 
 
-class LocalizationFinishAction(Action):
-    """Action for submitting final localization results."""
-
-    message: str = Field(
-        description="""Your final localization results in the specified format.
+class CodeLocation(BaseModel):
+    """A single code location with optional class and function."""
 
-Format:
-```
-path/to/file1.py
-class: ClassName
-function: method_name
+    file: str = Field(description="Path to the file (required)")
+    class_name: str | None = Field(default=None, description="Class name (optional)")
+    function_name: str | None = Field(default=None, description="Function/method name (optional)")
 
-path/to/file2.py
-function: standalone_function
 
-path/to/file3.py
-```
-
-Requirements:
-- Each location must have a file path
-- Class name is optional (omit if change is file-level or function is standalone)
-- Function name is optional (omit if change is file-level or class-level only)
-- Wrap your answer in triple backticks
+class LocalizationFinishAction(Action):
+    """Action for submitting final localization results."""
 
-Example for different scenarios:
-- File-level change (imports, globals): Just list the file
-- Class-level change (new method, attributes): List file + class
-- Method change: List file + class + function
-- Standalone function: List file + function
+    locations: list[CodeLocation] = Field(
+        description="""List of code locations to modify. Each location must have:
+- file: Path to the file (required)
+- class_name: Class name (optional, omit for file-level or standalone functions)
+- function_name: Function/method name (optional, omit for file-level or class-level only)
+
+Examples:
+- File-level change: {"file": "src/config.py"}
+- Class-level change: {"file": "src/user.py", "class_name": "User"}
+- Standalone function: {"file": "src/utils.py", "function_name": "helper"}
+- Method in class: {"file": "src/parser.py", "class_name": "Parser", "function_name": "parse"}
 """
     )
 
+    @computed_field
+    @property
+    def message(self) -> str:
+        """Auto-generate message from locations for backward compatibility."""
+        if not self.locations:
+            return ""
+
+        lines = []
+        for loc in self.locations:
+            lines.append(loc.file)
+            if loc.class_name:
+                lines.append(f"class: {loc.class_name}")
+            if loc.function_name:
+                lines.append(f"function: {loc.function_name}")
+            lines.append("")  # Empty line between locations
+
+        return "```\n" + "\n".join(lines).rstrip() + "\n```"
+
     @property
     def visualize(self) -> Text:
         """Return Rich Text representation of this action."""
         content = Text()
         content.append("Submitting localization results:\n", style="bold blue")
-        content.append(self.message)
+        content.append(f"Found {len(self.locations)} location(s):\n", style="green")
+        for i, loc in enumerate(self.locations, 1):
+            content.append(f"  {i}. {loc.file}", style="cyan")
+            if loc.class_name:
+                content.append(f" → {loc.class_name}", style="yellow")
+            if loc.function_name:
+                content.append(f".{loc.function_name}", style="magenta")
+            content.append("\n")
         return content
 
 
@@ -87,91 +105,23 @@ def visualize(self) -> Text:
         return content
 
 
-def parse_localization_output(raw_output: str) -> list[dict]:
-    """Parse localization output with optional class and function.
-
-    This is an enhanced version of parse_simple_output that handles:
-    - File-only entries (no class or function)
-    - File + class entries (no function)
-    - File + function entries (no class)
-    - File + class + function entries
+def locations_to_dict_list(locations: list[CodeLocation]) -> list[dict]:
+    """Convert CodeLocation objects to dictionary format.
 
     Args:
-        raw_output: Raw text output to parse
+        locations: List of CodeLocation objects
 
     Returns:
-        List of dictionaries with 'file', 'class' (optional), 'function' (optional)
+        List of dictionaries with 'file', 'class', 'function' keys
     """
-    # Remove triple backticks and whitespace
-    raw_output = raw_output.strip("` \n")
-
-    locations = []
-    current_file = None
-    current_class = None
-    current_function = None
-
-    lines = raw_output.strip().split("\n")
-
-    for line in lines:
-        line = line.strip()
-
-        if not line:
-            # Empty line - save current location if we have a file
-            if current_file:
-                locations.append({
-                    "file": current_file,
-                    "class": current_class,
-                    "function": current_function,
-                })
-                current_file = None
-                current_class = None
-                current_function = None
-            continue
-
-        # Check if this is a file path (ends with .py)
-        if line.endswith(".py"):
-            # Save previous location if exists
-            if current_file:
-                locations.append({
-                    "file": current_file,
-                    "class": current_class,
-                    "function": current_function,
-                })
-            # Start new location
-            current_file = line
-            current_class = None
-            current_function = None
-            continue
-
-        # Parse class declaration
-        if line.startswith("class:"):
-            class_name = line[len("class:"):].strip()
-            current_class = class_name
-            continue
-
-        # Parse function/method declaration
-        if line.startswith("function:") or line.startswith("method:"):
-            func_text = line.split(":", 1)[1].strip()
-            func_name = func_text.split()[0].strip("() ")
-
-            # Check if function includes class prefix (e.g., "MyClass.my_method")
-            if "." in func_name:
-                parts = func_name.split(".", 1)
-                current_class = parts[0]
-                current_function = parts[1]
-            else:
-                current_function = func_name
-            continue
-
-    # Don't forget the last location
-    if current_file:
-        locations.append({
-            "file": current_file,
-            "class": current_class,
-            "function": current_function,
-        })
-
-    return locations
+    return [
+        {
+            "file": loc.file,
+            "class": loc.class_name,
+            "function": loc.function_name,
+        }
+        for loc in locations
+    ]
 
 
 class LocalizationFinishExecutor(ToolExecutor):
@@ -201,41 +151,40 @@ def __call__(
         """
 
         try:
-            # Parse the output to validate format
-            locations = parse_localization_output(action.message)
+            # Get locations from action (already structured)
+            locations = action.locations
             num_locs = len(locations)
 
-            # Validation 1: Check if any locations were found
+            # Validation 1: Check if any locations were provided
             if num_locs == 0:
                 return LocalizationFinishObservation(
                     success=False,
                     num_locations=0,
                     validation_message=(
-                        "No valid locations found. Please provide at least one file path "
-                        "in the correct format wrapped in triple backticks."
+                        "No locations provided. Please provide at least one location."
                     ),
                     details={"error": "empty_output"}
                 )
 
             # Validation 2: Check each location has a file path
             errors = []
-            for i, loc in enumerate(locations):
-                if not loc.get('file'):
-                    errors.append(f"Location {i+1} is missing a file path")
+            for i, loc in enumerate(locations, 1):
+                if not loc.file:
+                    errors.append(f"Location {i} is missing a file path")
 
             if errors:
                 return LocalizationFinishObservation(
                     success=False,
                     num_locations=0,
                     validation_message="\n".join(errors),
-                    details={"error": "missing_file_paths", "locations": locations}
+                    details={"error": "missing_file_paths", "locations": locations_to_dict_list(locations)}
                 )
 
             # Validation 3: Check file existence (if workspace provided)
             if self.workspace_dir:
                 missing_files = []
                 for loc in locations:
-                    file_path = loc['file']
+                    file_path = loc.file
                     full_path = os.path.join(self.workspace_dir, file_path)
                     if not os.path.exists(full_path):
                         missing_files.append(file_path)
@@ -252,7 +201,7 @@ def __call__(
                         details={
                             "warning": "files_not_found",
                             "missing_files": missing_files,
-                            "locations": locations
+                            "locations": locations_to_dict_list(locations)
                         }
                     )
 
@@ -261,24 +210,19 @@ def __call__(
                 success=True,
                 num_locations=num_locs,
                 validation_message=f"Successfully submitted {num_locs} location(s).",
-                details={"locations": locations}
+                details={"locations": locations_to_dict_list(locations)}
             )
 
         except Exception as e:
-            # Parsing failed
+            # Validation failed
             return LocalizationFinishObservation(
                 success=False,
                 num_locations=0,
                 validation_message=(
-                    f"Error parsing output: {str(e)}\n\n"
-                    "Please ensure your output follows the correct format:\n"
-                    "```\n"
-                    "path/to/file.py\n"
-                    "class: ClassName\n"
-                    "function: method_name\n"
-                    "```"
+                    f"Error validating locations: {str(e)}\n\n"
+                    "Please ensure each location has a valid file path."
                 ),
-                details={"error": "parse_error", "exception": str(e)}
+                details={"error": "validation_error", "exception": str(e)}
             )
 
 
@@ -287,31 +231,26 @@ def __call__(
 Use this tool when you have identified all relevant files, classes, and functions
 that need to be modified to address the issue described in the problem statement.
 
-Format your results as follows:
-```
-path/to/file1.py
-class: ClassName
-function: method_name
+Provide a structured list of locations. Each location must have:
+- file: Path to the file (required)
+- class_name: Class name (optional)
+- function_name: Function/method name (optional)
+
+Examples of different scenarios:
 
-path/to/file2.py
-function: standalone_function
+1. File-level change (imports, globals, new top-level classes):
+   {"file": "src/config.py"}
 
-path/to/file3.py
-```
+2. Class-level change (new methods, class attributes, entire class modified):
+   {"file": "src/models/user.py", "class_name": "User"}
 
-Requirements:
-- Wrap your output in triple backticks (```)
-- Each location must start with a file path
-- Class name is OPTIONAL - include only if the change is within a specific class
-- Function name is OPTIONAL - include only if the change is at function/method level
+3. Standalone function (top-level function, not in a class):
+   {"file": "src/utils/helpers.py", "function_name": "format_date"}
 
-When to omit class/function:
-- File-level only (imports, globals, new classes): List just the file
-- Class-level only (new methods, attributes): List file + class (no function)
-- Standalone function: List file + function (no class)
-- Method in class: List file + class + function
+4. Method in a class (specific method modification):
+   {"file": "src/parser.py", "class_name": "DataParser", "function_name": "parse_json"}
 
-The tool will validate your submission and provide feedback if the format is incorrect.
+The tool will validate file existence and provide feedback if issues are found.
 """
 
 
@@ -351,11 +290,11 @@ def create(
         ]
 
 
-@tool(name="finish")
+@tool(name="localization_finish")
 def _make_localization_finish_tool(conv_state) -> list[ToolDefinition]:
     """Create localization finish tool.
 
-    This replaces the default finish tool with a localization-specific version
-    that validates the output format.
+    This is a localization-specific finish tool that accepts structured locations
+    and validates the output format.
     """
     return LocalizationFinishTool.create(conv_state)
diff --git a/tests/tools/test_localization_finish.py b/tests/tools/test_localization_finish.py
index 2e348e9..d03c7aa 100644
--- a/tests/tools/test_localization_finish.py
+++ b/tests/tools/test_localization_finish.py
@@ -9,6 +9,7 @@
     LocalizationFinishAction,
     LocalizationFinishExecutor,
     LocalizationFinishObservation,
+    CodeLocation,
 )
 
 
@@ -26,23 +27,25 @@ def teardown_method(self):
         if os.path.exists(self.temp_dir):
             shutil.rmtree(self.temp_dir)
 
-    def test_empty_message(self):
-        """Test with empty message."""
-        action = LocalizationFinishAction(message="")
+    def test_empty_locations(self):
+        """Test with empty locations list."""
+        action = LocalizationFinishAction(locations=[])
         result = self.executor(action)
 
         assert result.success is False
         assert result.num_locations == 0
-        assert "No valid locations" in result.validation_message
+        assert "No locations provided" in result.validation_message
 
-    def test_malformed_input(self):
-        """Test with malformed input."""
-        action = LocalizationFinishAction(message="random text without proper format")
+    def test_missing_file_field(self):
+        """Test with location missing file field."""
+        # This would fail Pydantic validation, so we test with empty file string
+        action = LocalizationFinishAction(
+            locations=[CodeLocation(file="", class_name="MyClass")]
+        )
         result = self.executor(action)
 
         assert result.success is False
-        # Malformed input results in no locations found
-        assert "No valid locations" in result.validation_message
+        assert "missing a file path" in result.validation_message
 
     def test_file_only_valid(self):
         """Test valid file-level localization (no class or function)."""
@@ -50,9 +53,9 @@ def test_file_only_valid(self):
         file_path = "test_file.py"
         Path(self.temp_dir, file_path).touch()
 
-        action = LocalizationFinishAction(message=f"""```
-{file_path}
-```""")
+        action = LocalizationFinishAction(
+            locations=[CodeLocation(file=file_path)]
+        )
         result = self.executor(action)
 
         assert result.success is True
@@ -64,10 +67,9 @@ def test_file_and_class_valid(self):
         file_path = "test_file.py"
         Path(self.temp_dir, file_path).touch()
 
-        action = LocalizationFinishAction(message=f"""```
-{file_path}
-class: MyClass
-```""")
+        action = LocalizationFinishAction(
+            locations=[CodeLocation(file=file_path, class_name="MyClass")]
+        )
         result = self.executor(action)
 
         assert result.success is True
@@ -78,10 +80,9 @@ def test_file_and_function_valid(self):
         file_path = "test_file.py"
         Path(self.temp_dir, file_path).touch()
 
-        action = LocalizationFinishAction(message=f"""```
-{file_path}
-function: my_function
-```""")
+        action = LocalizationFinishAction(
+            locations=[CodeLocation(file=file_path, function_name="my_function")]
+        )
         result = self.executor(action)
 
         assert result.success is True
@@ -92,11 +93,13 @@ def test_complete_localization_valid(self):
         file_path = "test_file.py"
         Path(self.temp_dir, file_path).touch()
 
-        action = LocalizationFinishAction(message=f"""```
-{file_path}
-class: MyClass
-function: my_method
-```""")
+        action = LocalizationFinishAction(
+            locations=[CodeLocation(
+                file=file_path,
+                class_name="MyClass",
+                function_name="my_method"
+            )]
+        )
         result = self.executor(action)
 
         assert result.success is True
@@ -108,19 +111,14 @@ def test_multiple_locations_mixed_formats(self):
         for i in range(1, 5):
             Path(self.temp_dir, f"file{i}.py").touch()
 
-        action = LocalizationFinishAction(message="""```
-file1.py
-
-file2.py
-class: ClassA
-
-file3.py
-function: func_b
-
-file4.py
-class: ClassC
-function: method_d
-```""")
+        action = LocalizationFinishAction(
+            locations=[
+                CodeLocation(file="file1.py"),
+                CodeLocation(file="file2.py", class_name="ClassA"),
+                CodeLocation(file="file3.py", function_name="func_b"),
+                CodeLocation(file="file4.py", class_name="ClassC", function_name="method_d"),
+            ]
+        )
         result = self.executor(action)
 
         assert result.success is True
@@ -129,9 +127,9 @@ def test_multiple_locations_mixed_formats(self):
 
     def test_missing_file_warning(self):
         """Test that missing files trigger a warning."""
-        action = LocalizationFinishAction(message="""```
-nonexistent_file.py
-```""")
+        action = LocalizationFinishAction(
+            locations=[CodeLocation(file="nonexistent_file.py")]
+        )
         result = self.executor(action)
 
         # Should still parse but with warning
@@ -145,34 +143,25 @@ def test_some_files_missing(self):
         # Create only one file
         Path(self.temp_dir, "exists.py").touch()
 
-        action = LocalizationFinishAction(message="""```
-exists.py
-
-missing.py
-```""")
+        action = LocalizationFinishAction(
+            locations=[
+                CodeLocation(file="exists.py"),
+                CodeLocation(file="missing.py"),
+            ]
+        )
         result = self.executor(action)
 
         assert result.success is False
         assert result.num_locations == 2
         assert "missing.py" in result.validation_message
 
-    def test_no_backticks(self):
-        """Test that output without backticks fails parsing."""
-        action = LocalizationFinishAction(message="""file.py
-class: MyClass
-""")
-        result = self.executor(action)
-
-        # Should fail because parse_simple_output expects backticks
-        assert result.success is False
-
     def test_executor_without_workspace(self):
         """Test executor without workspace validation."""
         executor = LocalizationFinishExecutor(workspace_dir=None)
 
-        action = LocalizationFinishAction(message="""```
-any_file.py
-```""")
+        action = LocalizationFinishAction(
+            locations=[CodeLocation(file="any_file.py")]
+        )
         result = executor(action)
 
         # Should succeed without file existence check
@@ -186,43 +175,45 @@ def test_nested_file_path(self):
         nested_dir.mkdir(parents=True)
         Path(nested_dir, "helper.py").touch()
 
-        action = LocalizationFinishAction(message="""```
-src/utils/helper.py
-function: process
-```""")
+        action = LocalizationFinishAction(
+            locations=[
+                CodeLocation(file="src/utils/helper.py", function_name="process")
+            ]
+        )
         result = self.executor(action)
 
         assert result.success is True
         assert result.num_locations == 1
 
-    def test_dotted_function_name(self):
-        """Test with ClassName.method_name format."""
+    def test_structured_class_and_function(self):
+        """Test with class and function specified separately."""
         file_path = "test.py"
         Path(self.temp_dir, file_path).touch()
 
-        action = LocalizationFinishAction(message=f"""```
-{file_path}
-function: MyClass.my_method
-```""")
+        action = LocalizationFinishAction(
+            locations=[
+                CodeLocation(
+                    file=file_path,
+                    class_name="MyClass",
+                    function_name="my_method"
+                )
+            ]
+        )
         result = self.executor(action)
 
         assert result.success is True
         assert result.num_locations == 1
-        # Should parse as class=MyClass, function=my_method
+        # Verify structured data is preserved
         assert result.details["locations"][0]["class"] == "MyClass"
         assert result.details["locations"][0]["function"] == "my_method"
 
     def test_multiple_missing_files_truncated(self):
         """Test that many missing files are truncated in message."""
-        action = LocalizationFinishAction(message="""```
-file1.py
-file2.py
-file3.py
-file4.py
-file5.py
-file6.py
-file7.py
-```""")
+        action = LocalizationFinishAction(
+            locations=[
+                CodeLocation(file=f"file{i}.py") for i in range(1, 8)
+            ]
+        )
         result = self.executor(action)
 
         assert result.success is False

From 84a6ee5d419e60312309d1f57cdbc708484600b0 Mon Sep 17 00:00:00 2001
From: Leander Maben <leander.maben@gmail.com>
Date: Sun, 4 Jan 2026 17:41:03 -0500
Subject: [PATCH 3/3] Update generator to look for the finish tool

---
 src/generator/code_search_generator.py        | 42 +++++++++++++++++--
 .../file_localization/file_localization.py    | 15 ++++++-
 2 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/src/generator/code_search_generator.py b/src/generator/code_search_generator.py
index b3ec6bf..34cb3b8 100644
--- a/src/generator/code_search_generator.py
+++ b/src/generator/code_search_generator.py
@@ -54,6 +54,8 @@
     LLMConvertibleEvent,
     get_logger,
 )
+from openhands.sdk.event import ActionEvent
+from src.tools.localization_finish import LocalizationFinishAction
 
 from src.prompts.prompt_builder import get_instruction
 from src.utils.instance import clone_instance
@@ -74,6 +76,35 @@
 
 file_path = os.path.dirname(__file__)
 
+
+def get_structured_locations(events: List[Event]) -> Optional[List[Dict[str, Any]]]:
+    """Extract structured locations from LocalizationFinishAction in events.
+
+    Args:
+        events: List of conversation events to search through.
+
+    Returns:
+        List of location dicts with 'file', 'class', 'function' keys, or None if not found.
+    """
+    # Find the last LocalizationFinishAction
+    for event in reversed(events):
+        if (
+            isinstance(event, ActionEvent)
+            and event.source == "agent"
+            and isinstance(event.action, LocalizationFinishAction)
+        ):
+            # Extract structured locations from the action
+            locations = []
+            for loc in event.action.locations:
+                locations.append({
+                    "file": loc.file,
+                    "class": loc.class_name,
+                    "function": loc.function_name,
+                })
+            return locations
+    return None
+
+
 @ray.remote(num_cpus=0.01)
 def init_and_run(
     instance: dict,
@@ -156,6 +187,9 @@ def init_and_run(
     messages = list(map(lambda event: event.model_dump(), conversation.state.events))
     final_message = get_agent_final_response(conversation.state.events)
 
+    # Extract structured locations if available
+    structured_locations = get_structured_locations(conversation.state.events)
+
     # remove the workspace dir
     try:
         if workspace.exists():
@@ -179,7 +213,7 @@ def init_and_run(
         "end_timestamp": end_timestamp
     }
 
-    return messages, final_message, additional_attr
+    return messages, final_message, structured_locations, additional_attr
 
 
 class CodeSearchGenerator(SkyRLGymGenerator):
@@ -230,7 +264,7 @@ async def code_search_loop(
         instance = env_extras
         error = None
         try:
-            messages, final_message, additional_attr = await init_and_run.remote(
+            messages, final_message, structured_locations, additional_attr = await init_and_run.remote(
                 instance,
                 self.litellm_model_name,
                 # sweagent_config,
@@ -249,6 +283,7 @@ async def code_search_loop(
             error = str(e) + "\n" + traceback.format_exc()
             messages = []
             final_message = ""
+            structured_locations = None
             additional_attr = {
                 "wall_clock_duration": 0.0,
                 "start_timestamp": None,
@@ -269,6 +304,7 @@ async def code_search_loop(
             try:
                 input_args = {
                     "final_message": final_message,
+                    "structured_locations": structured_locations,
                     "messages": messages,
                     "instance": instance,
                 }
@@ -276,7 +312,7 @@ async def code_search_loop(
                 reward_fn = get_reward_function(reward_fn_args["fn"])
 
                 input_args = {
-                    **input_args, 
+                    **input_args,
                     **reward_fn_args.get("args", {})
                     }
 
diff --git a/src/rewards/file_localization/file_localization.py b/src/rewards/file_localization/file_localization.py
index 64e155a..2c64221 100644
--- a/src/rewards/file_localization/file_localization.py
+++ b/src/rewards/file_localization/file_localization.py
@@ -26,9 +26,15 @@ def file_localization_f1_reward(
     final_message: str,
     instance: dict,
     file_level_weight: float=1.0,
+    structured_locations=None,
     **kwargs
     ):
-    all_found_files, all_found_modules, all_found_entities = get_simple_results_from_raw_outputs(final_message)
+    # Use structured locations if available, otherwise parse final_message
+    if structured_locations is not None:
+        all_found_files, all_found_modules, all_found_entities = get_simple_results_from_raw_outputs(structured_locations)
+    else:
+        all_found_files, all_found_modules, all_found_entities = get_simple_results_from_raw_outputs(final_message)
+
     true_files = set(x[0] for x in ast.literal_eval(instance["target"]))
     file_level_score = compute_file_f1_score(all_found_files, true_files)
     weighted_file_score = file_level_weight * file_level_score
@@ -42,6 +48,7 @@ def multilevel_localization_f1_reward(
     file_level_weight: float=1.0,
     module_level_weight: float=1.0,
     entity_level_weight: float=1.0,
+    structured_locations=None,
     **kwargs
     ):
 
@@ -67,7 +74,11 @@ def multilevel_localization_f1_reward(
     gt_modules = set(gt_modules)
     gt_entities = set(gt_entities)
 
-    predicted_files, predicted_modules, predicted_entities = get_simple_results_from_raw_outputs(final_message)
+    # Use structured locations if available, otherwise parse final_message
+    if structured_locations is not None:
+        predicted_files, predicted_modules, predicted_entities = get_simple_results_from_raw_outputs(structured_locations)
+    else:
+        predicted_files, predicted_modules, predicted_entities = get_simple_results_from_raw_outputs(final_message)
 
     file_f1_score = compute_file_f1_score(predicted_files, gt_files)
     module_f1_score = compute_file_f1_score(predicted_modules, gt_modules)