diff --git a/configs/skyrl-experiments/read-only.yaml b/configs/skyrl-experiments/read-only.yaml index 4d5525a..4f48460 100644 --- a/configs/skyrl-experiments/read-only.yaml +++ b/configs/skyrl-experiments/read-only.yaml @@ -9,6 +9,7 @@ tools: - glob - grep - terminal + - localization_finish prompts: system_prompt: "templates/system_prompt.j2" diff --git a/configs/skyrl-experiments/terminal.yaml b/configs/skyrl-experiments/terminal.yaml index 6ad62c6..613ad3f 100644 --- a/configs/skyrl-experiments/terminal.yaml +++ b/configs/skyrl-experiments/terminal.yaml @@ -7,6 +7,7 @@ reward: tools: - terminal + - localization_finish prompts: system_prompt: "templates/system_prompt.j2" diff --git a/src/generator/code_search_generator.py b/src/generator/code_search_generator.py index b3ec6bf..34cb3b8 100644 --- a/src/generator/code_search_generator.py +++ b/src/generator/code_search_generator.py @@ -54,6 +54,8 @@ LLMConvertibleEvent, get_logger, ) +from openhands.sdk.event import ActionEvent +from src.tools.localization_finish import LocalizationFinishAction from src.prompts.prompt_builder import get_instruction from src.utils.instance import clone_instance @@ -74,6 +76,35 @@ file_path = os.path.dirname(__file__) + +def get_structured_locations(events: List[Event]) -> Optional[List[Dict[str, Any]]]: + """Extract structured locations from LocalizationFinishAction in events. + + Args: + events: List of conversation events to search through. + + Returns: + List of location dicts with 'file', 'class', 'function' keys, or None if not found. + """ + # Find the last LocalizationFinishAction + for event in reversed(events): + if ( + isinstance(event, ActionEvent) + and event.source == "agent" + and isinstance(event.action, LocalizationFinishAction) + ): + # Extract structured locations from the action + locations = [] + for loc in event.action.locations: + locations.append({ + "file": loc.file, + "class": loc.class_name, + "function": loc.function_name, + }) + return locations + return None + + @ray.remote(num_cpus=0.01) def init_and_run( instance: dict, @@ -156,6 +187,9 @@ def init_and_run( messages = list(map(lambda event: event.model_dump(), conversation.state.events)) final_message = get_agent_final_response(conversation.state.events) + # Extract structured locations if available + structured_locations = get_structured_locations(conversation.state.events) + # remove the workspace dir try: if workspace.exists(): @@ -179,7 +213,7 @@ def init_and_run( "end_timestamp": end_timestamp } - return messages, final_message, additional_attr + return messages, final_message, structured_locations, additional_attr class CodeSearchGenerator(SkyRLGymGenerator): @@ -230,7 +264,7 @@ async def code_search_loop( instance = env_extras error = None try: - messages, final_message, additional_attr = await init_and_run.remote( + messages, final_message, structured_locations, additional_attr = await init_and_run.remote( instance, self.litellm_model_name, # sweagent_config, @@ -249,6 +283,7 @@ async def code_search_loop( error = str(e) + "\n" + traceback.format_exc() messages = [] final_message = "" + structured_locations = None additional_attr = { "wall_clock_duration": 0.0, "start_timestamp": None, @@ -269,6 +304,7 @@ async def code_search_loop( try: input_args = { "final_message": final_message, + "structured_locations": structured_locations, "messages": messages, "instance": instance, } @@ -276,7 +312,7 @@ async def code_search_loop( reward_fn = get_reward_function(reward_fn_args["fn"]) input_args = { - **input_args, + **input_args, **reward_fn_args.get("args", {}) } diff --git a/src/prompts/templates/system_prompt.j2 b/src/prompts/templates/system_prompt.j2 index f5a67aa..a79f2d1 100644 --- a/src/prompts/templates/system_prompt.j2 +++ b/src/prompts/templates/system_prompt.j2 @@ -36,12 +36,52 @@ You are given access to the codebase in a linux file system. * Read targeted line ranges around matches using `sed -n 'START,ENDp'` * Only read additional chunks if the initial sections are relevant -### Final Answer Format (REQUIRED) -- You MUST return your final answer in backticks ``` ... ``` -- Format: ```\nfull_path1/file1.py\nclass: MyClass1\nfunction: my_function1\n\nfull_path2/file2.py\nfunction: MyClass2.my_function2\n\nfull_path3/file3.py\nfunction: my_function3\n``` -- List one file path per line -- Use relative paths as they appear in the repository -- DO NOT include any other text inside the backticks +### Submitting Your Answer (REQUIRED) + +When you have identified all relevant locations, use the `localization_finish` tool to submit your results. + +**Format Requirements:** +Submit a structured list of locations. Each location is a JSON object with: +- `file`: Path to the file (REQUIRED) +- `class_name`: Class name (OPTIONAL - omit for file-level or standalone functions) +- `function_name`: Function/method name (OPTIONAL - omit for file-level or class-level only) + +**When to include what:** +- File-level changes (imports, globals, new top-level classes): Just `file` +- Class-level changes (new methods, attributes, entire class): `file` + `class_name` +- Standalone function (top-level function): `file` + `function_name` +- Method in a class: `file` + `class_name` + `function_name` + +**Example formats:** + +1. File-only (imports, globals, new class): +```json +{"file": "path/to/file1.py"} +``` + +2. File + Class (class-level changes): +```json +{"file": "path/to/file2.py", "class_name": "MyClass"} +``` + +3. File + Function (standalone function): +```json +{"file": "path/to/file3.py", "function_name": "my_function"} +``` + +4. File + Class + Function (method): +```json +{"file": "path/to/file4.py", "class_name": "MyClass", "function_name": "my_method"} +``` + +5. Multiple locations: +```json +[ + {"file": "src/parser.py", "class_name": "DataParser", "function_name": "parse_json"}, + {"file": "src/models/user.py", "class_name": "User"}, + {"file": "src/config.py"} +] +``` ## SEARCH STRATEGY @@ -61,32 +101,46 @@ You are given access to the codebase in a linux file system. 3. **Final Verification**: Confirm your file list - Verify each candidate file is truly relevant - Ensure you haven't missed related files - - Return your answer in backticks ``` ... ``` + - Use the `localization_finish` tool to submit your answer ## CRITICAL RULES - NEVER exceed 5 parallel bash tool calls in a single turn -- NEVER respond without wrapping your file list in backticks ``` +- ALWAYS use the `localization_finish` tool when you're done - ALWAYS use bash tool to search (do not guess file locations) - NEVER read entire large files - always read in chunks (100-line ranges) - Check file size with `wc -l` before reading - Read file contents in chunks to verify relevance before including them - Return file paths as they appear in the repository. Do not begin the path with "./" - Aim for high precision (all files relevant) and high recall (no relevant files missed) +- Class and function names are OPTIONAL - only include when changes are at that level -## EXAMPLE OUTPUT +## EXAMPLE SUBMISSION -After exploring the codebase, return your answer like this: +When ready, call the `localization_finish` tool with your findings: -Your final output should list the locations requiring modification, wrapped with triple backticks ``` -Each location should include the file path, class name (if applicable), and function name. Here is an example Output: +```json +[ + { + "file": "src/utils/parser.py", + "class_name": "DataParser", + "function_name": "parse_json" + }, + { + "file": "src/models/user.py", + "class_name": "User" + }, + { + "file": "src/config.py" + }, + { + "file": "src/api/endpoints.py", + "function_name": "handle_request" + } +] ``` -full_path1/file1.py -class: MyClass1 -function: my_function1 - -full_path2/file2.py -function: MyClass2.my_function2 -full_path3/file3.py -function: my_function3 -``` \ No newline at end of file +**Note:** In this example: +- `parser.py` has a specific method change (file + class + function) +- `user.py` has a class-level change (file + class only) +- `config.py` has file-level changes (file only) +- `endpoints.py` has a standalone function change (file + function only) \ No newline at end of file diff --git a/src/rewards/file_localization/file_localization.py b/src/rewards/file_localization/file_localization.py index 64e155a..2c64221 100644 --- a/src/rewards/file_localization/file_localization.py +++ b/src/rewards/file_localization/file_localization.py @@ -26,9 +26,15 @@ def file_localization_f1_reward( final_message: str, instance: dict, file_level_weight: float=1.0, + structured_locations=None, **kwargs ): - all_found_files, all_found_modules, all_found_entities = get_simple_results_from_raw_outputs(final_message) + # Use structured locations if available, otherwise parse final_message + if structured_locations is not None: + all_found_files, all_found_modules, all_found_entities = get_simple_results_from_raw_outputs(structured_locations) + else: + all_found_files, all_found_modules, all_found_entities = get_simple_results_from_raw_outputs(final_message) + true_files = set(x[0] for x in ast.literal_eval(instance["target"])) file_level_score = compute_file_f1_score(all_found_files, true_files) weighted_file_score = file_level_weight * file_level_score @@ -42,6 +48,7 @@ def multilevel_localization_f1_reward( file_level_weight: float=1.0, module_level_weight: float=1.0, entity_level_weight: float=1.0, + structured_locations=None, **kwargs ): @@ -67,7 +74,11 @@ def multilevel_localization_f1_reward( gt_modules = set(gt_modules) gt_entities = set(gt_entities) - predicted_files, predicted_modules, predicted_entities = get_simple_results_from_raw_outputs(final_message) + # Use structured locations if available, otherwise parse final_message + if structured_locations is not None: + predicted_files, predicted_modules, predicted_entities = get_simple_results_from_raw_outputs(structured_locations) + else: + predicted_files, predicted_modules, predicted_entities = get_simple_results_from_raw_outputs(final_message) file_f1_score = compute_file_f1_score(predicted_files, gt_files) module_f1_score = compute_file_f1_score(predicted_modules, gt_modules) diff --git a/src/rewards/file_localization/module_rewards.py b/src/rewards/file_localization/module_rewards.py index 8be9d18..c0fd68c 100644 --- a/src/rewards/file_localization/module_rewards.py +++ b/src/rewards/file_localization/module_rewards.py @@ -1,17 +1,17 @@ import logging -from typing import Dict, List, Tuple +from typing import Dict, List, Tuple, Union -def parse_simple_output(raw_output: str) -> List[Dict[str, str]]: +def parse_simple_output(raw_output: Union[str, List[Dict[str, str]]]) -> List[Dict[str, str]]: """ Parse simplified agent output containing filename, optional class, and function. Args: - raw_output: Raw text output from the agent + raw_output: Either a raw text string OR a list of location dicts (for structured input) Returns: List of dictionaries with keys: 'file', 'class' (optional), 'function' - Example input format: + Example string input format: ``` path/to/file1.py class: MyClass @@ -21,12 +21,32 @@ def parse_simple_output(raw_output: str) -> List[Dict[str, str]]: function: standalone_function ``` - Example output: + Example structured input format: + [ + {'file': 'path/to/file1.py', 'class': 'MyClass', 'function': 'my_method'}, + {'file': 'path/to/file2.py', 'class': None, 'function': 'standalone_function'} + ] + + Example output (same for both): [ {'file': 'path/to/file1.py', 'class': 'MyClass', 'function': 'my_method'}, {'file': 'path/to/file2.py', 'class': None, 'function': 'standalone_function'} ] """ + # Handle structured input (list of dicts) + if isinstance(raw_output, list): + # Already in the correct format (or close to it) + # Normalize field names: class_name -> class, function_name -> function + normalized = [] + for loc in raw_output: + normalized.append({ + 'file': loc.get('file', ''), + 'class': loc.get('class') or loc.get('class_name'), + 'function': loc.get('function') or loc.get('function_name'), + }) + return normalized + + # Handle string input (legacy format) # Remove triple backticks and whitespace raw_output = raw_output.strip("` \n") diff --git a/src/tools/localization_finish.py b/src/tools/localization_finish.py new file mode 100644 index 0000000..513d371 --- /dev/null +++ b/src/tools/localization_finish.py @@ -0,0 +1,300 @@ +"""Custom finish tool for code localization tasks. + +This tool allows the agent to submit localization results in a structured format where: +- File path is required +- Class name is optional +- Function name is optional +""" + +import os +from typing import TYPE_CHECKING +from collections.abc import Sequence + +from pydantic import BaseModel, Field, computed_field +from rich.text import Text + +from openhands.sdk import ( + Action, + Observation, + ToolDefinition, +) +from openhands.sdk.tool import ToolExecutor + +from src.tools import tool + +if TYPE_CHECKING: + from openhands.sdk.conversation.base import BaseConversation + + +class CodeLocation(BaseModel): + """A single code location with optional class and function.""" + + file: str = Field(description="Path to the file (required)") + class_name: str | None = Field(default=None, description="Class name (optional)") + function_name: str | None = Field(default=None, description="Function/method name (optional)") + + +class LocalizationFinishAction(Action): + """Action for submitting final localization results.""" + + locations: list[CodeLocation] = Field( + description="""List of code locations to modify. Each location must have: +- file: Path to the file (required) +- class_name: Class name (optional, omit for file-level or standalone functions) +- function_name: Function/method name (optional, omit for file-level or class-level only) + +Examples: +- File-level change: {"file": "src/config.py"} +- Class-level change: {"file": "src/user.py", "class_name": "User"} +- Standalone function: {"file": "src/utils.py", "function_name": "helper"} +- Method in class: {"file": "src/parser.py", "class_name": "Parser", "function_name": "parse"} +""" + ) + + @computed_field + @property + def message(self) -> str: + """Auto-generate message from locations for backward compatibility.""" + if not self.locations: + return "" + + lines = [] + for loc in self.locations: + lines.append(loc.file) + if loc.class_name: + lines.append(f"class: {loc.class_name}") + if loc.function_name: + lines.append(f"function: {loc.function_name}") + lines.append("") # Empty line between locations + + return "```\n" + "\n".join(lines).rstrip() + "\n```" + + @property + def visualize(self) -> Text: + """Return Rich Text representation of this action.""" + content = Text() + content.append("Submitting localization results:\n", style="bold blue") + content.append(f"Found {len(self.locations)} location(s):\n", style="green") + for i, loc in enumerate(self.locations, 1): + content.append(f" {i}. {loc.file}", style="cyan") + if loc.class_name: + content.append(f" → {loc.class_name}", style="yellow") + if loc.function_name: + content.append(f".{loc.function_name}", style="magenta") + content.append("\n") + return content + + +class LocalizationFinishObservation(Observation): + """Observation returned after submitting localization results.""" + + success: bool = Field(default=True, description="Whether submission was successful") + num_locations: int = Field(default=0, description="Number of locations submitted") + validation_message: str = Field(default="", description="Validation feedback") + details: dict = Field(default_factory=dict, description="Additional details") + + @property + def visualize(self) -> Text: + """Return Rich Text representation of this observation.""" + content = Text() + if self.success: + content.append(f"✓ Successfully submitted {self.num_locations} location(s)\n", style="bold green") + else: + content.append(f"✗ Submission failed\n", style="bold red") + content.append(f"{self.validation_message}\n", style="yellow") + return content + + +def locations_to_dict_list(locations: list[CodeLocation]) -> list[dict]: + """Convert CodeLocation objects to dictionary format. + + Args: + locations: List of CodeLocation objects + + Returns: + List of dictionaries with 'file', 'class', 'function' keys + """ + return [ + { + "file": loc.file, + "class": loc.class_name, + "function": loc.function_name, + } + for loc in locations + ] + + +class LocalizationFinishExecutor(ToolExecutor): + """Executor for localization finish tool with validation.""" + + def __init__(self, workspace_dir: str | None = None): + """Initialize the executor. + + Args: + workspace_dir: Optional workspace directory to validate file existence. + """ + self.workspace_dir = workspace_dir + + def __call__( + self, + action: LocalizationFinishAction, + conversation: "BaseConversation | None" = None, + ) -> LocalizationFinishObservation: + """Execute the finish action with validation. + + Args: + action: The localization finish action to execute + conversation: Optional conversation context + + Returns: + LocalizationFinishObservation with validation results + """ + + try: + # Get locations from action (already structured) + locations = action.locations + num_locs = len(locations) + + # Validation 1: Check if any locations were provided + if num_locs == 0: + return LocalizationFinishObservation( + success=False, + num_locations=0, + validation_message=( + "No locations provided. Please provide at least one location." + ), + details={"error": "empty_output"} + ) + + # Validation 2: Check each location has a file path + errors = [] + for i, loc in enumerate(locations, 1): + if not loc.file: + errors.append(f"Location {i} is missing a file path") + + if errors: + return LocalizationFinishObservation( + success=False, + num_locations=0, + validation_message="\n".join(errors), + details={"error": "missing_file_paths", "locations": locations_to_dict_list(locations)} + ) + + # Validation 3: Check file existence (if workspace provided) + if self.workspace_dir: + missing_files = [] + for loc in locations: + file_path = loc.file + full_path = os.path.join(self.workspace_dir, file_path) + if not os.path.exists(full_path): + missing_files.append(file_path) + + if missing_files: + return LocalizationFinishObservation( + success=False, + num_locations=num_locs, + validation_message=( + f"Warning: {len(missing_files)} file(s) not found in workspace:\n" + + "\n".join(f" - {f}" for f in missing_files[:5]) + + (f"\n ... and {len(missing_files) - 5} more" if len(missing_files) > 5 else "") + ), + details={ + "warning": "files_not_found", + "missing_files": missing_files, + "locations": locations_to_dict_list(locations) + } + ) + + # Success! + return LocalizationFinishObservation( + success=True, + num_locations=num_locs, + validation_message=f"Successfully submitted {num_locs} location(s).", + details={"locations": locations_to_dict_list(locations)} + ) + + except Exception as e: + # Validation failed + return LocalizationFinishObservation( + success=False, + num_locations=0, + validation_message=( + f"Error validating locations: {str(e)}\n\n" + "Please ensure each location has a valid file path." + ), + details={"error": "validation_error", "exception": str(e)} + ) + + +TOOL_DESCRIPTION = """Submit your final code localization results. + +Use this tool when you have identified all relevant files, classes, and functions +that need to be modified to address the issue described in the problem statement. + +Provide a structured list of locations. Each location must have: +- file: Path to the file (required) +- class_name: Class name (optional) +- function_name: Function/method name (optional) + +Examples of different scenarios: + +1. File-level change (imports, globals, new top-level classes): + {"file": "src/config.py"} + +2. Class-level change (new methods, class attributes, entire class modified): + {"file": "src/models/user.py", "class_name": "User"} + +3. Standalone function (top-level function, not in a class): + {"file": "src/utils/helpers.py", "function_name": "format_date"} + +4. Method in a class (specific method modification): + {"file": "src/parser.py", "class_name": "DataParser", "function_name": "parse_json"} + +The tool will validate file existence and provide feedback if issues are found. +""" + + +class LocalizationFinishTool(ToolDefinition[LocalizationFinishAction, LocalizationFinishObservation]): + """Tool for submitting final code localization results.""" + + @classmethod + def create( + cls, + conv_state, + workspace_dir: str | None = None, + **params + ) -> Sequence["LocalizationFinishTool"]: + """Create LocalizationFinishTool instance. + + Args: + conv_state: Conversation state (provides workspace info) + workspace_dir: Optional workspace directory override + **params: Additional parameters + + Returns: + A sequence containing a single LocalizationFinishTool instance. + """ + # Get workspace from conv_state if not provided + if workspace_dir is None and hasattr(conv_state, 'workspace'): + workspace_dir = str(conv_state.workspace.working_dir) + + executor = LocalizationFinishExecutor(workspace_dir=workspace_dir) + + return [ + cls( + action_type=LocalizationFinishAction, + observation_type=LocalizationFinishObservation, + description=TOOL_DESCRIPTION, + executor=executor, + ) + ] + + +@tool(name="localization_finish") +def _make_localization_finish_tool(conv_state) -> list[ToolDefinition]: + """Create localization finish tool. + + This is a localization-specific finish tool that accepts structured locations + and validates the output format. + """ + return LocalizationFinishTool.create(conv_state) diff --git a/tests/tools/test_localization_finish.py b/tests/tools/test_localization_finish.py new file mode 100644 index 0000000..d03c7aa --- /dev/null +++ b/tests/tools/test_localization_finish.py @@ -0,0 +1,254 @@ +"""Tests for the localization finish tool.""" + +import os +import tempfile +import pytest +from pathlib import Path + +from src.tools.localization_finish import ( + LocalizationFinishAction, + LocalizationFinishExecutor, + LocalizationFinishObservation, + CodeLocation, +) + + +class TestLocalizationFinishExecutor: + """Tests for LocalizationFinishExecutor.""" + + def setup_method(self): + """Create executor and temp workspace for each test.""" + self.temp_dir = tempfile.mkdtemp() + self.executor = LocalizationFinishExecutor(workspace_dir=self.temp_dir) + + def teardown_method(self): + """Clean up temp workspace.""" + import shutil + if os.path.exists(self.temp_dir): + shutil.rmtree(self.temp_dir) + + def test_empty_locations(self): + """Test with empty locations list.""" + action = LocalizationFinishAction(locations=[]) + result = self.executor(action) + + assert result.success is False + assert result.num_locations == 0 + assert "No locations provided" in result.validation_message + + def test_missing_file_field(self): + """Test with location missing file field.""" + # This would fail Pydantic validation, so we test with empty file string + action = LocalizationFinishAction( + locations=[CodeLocation(file="", class_name="MyClass")] + ) + result = self.executor(action) + + assert result.success is False + assert "missing a file path" in result.validation_message + + def test_file_only_valid(self): + """Test valid file-level localization (no class or function).""" + # Create the file in workspace + file_path = "test_file.py" + Path(self.temp_dir, file_path).touch() + + action = LocalizationFinishAction( + locations=[CodeLocation(file=file_path)] + ) + result = self.executor(action) + + assert result.success is True + assert result.num_locations == 1 + assert "Successfully submitted" in result.validation_message + + def test_file_and_class_valid(self): + """Test valid class-level localization (no function).""" + file_path = "test_file.py" + Path(self.temp_dir, file_path).touch() + + action = LocalizationFinishAction( + locations=[CodeLocation(file=file_path, class_name="MyClass")] + ) + result = self.executor(action) + + assert result.success is True + assert result.num_locations == 1 + + def test_file_and_function_valid(self): + """Test valid function-level localization (no class).""" + file_path = "test_file.py" + Path(self.temp_dir, file_path).touch() + + action = LocalizationFinishAction( + locations=[CodeLocation(file=file_path, function_name="my_function")] + ) + result = self.executor(action) + + assert result.success is True + assert result.num_locations == 1 + + def test_complete_localization_valid(self): + """Test complete localization with file, class, and function.""" + file_path = "test_file.py" + Path(self.temp_dir, file_path).touch() + + action = LocalizationFinishAction( + locations=[CodeLocation( + file=file_path, + class_name="MyClass", + function_name="my_method" + )] + ) + result = self.executor(action) + + assert result.success is True + assert result.num_locations == 1 + + def test_multiple_locations_mixed_formats(self): + """Test multiple locations with different formats.""" + # Create files + for i in range(1, 5): + Path(self.temp_dir, f"file{i}.py").touch() + + action = LocalizationFinishAction( + locations=[ + CodeLocation(file="file1.py"), + CodeLocation(file="file2.py", class_name="ClassA"), + CodeLocation(file="file3.py", function_name="func_b"), + CodeLocation(file="file4.py", class_name="ClassC", function_name="method_d"), + ] + ) + result = self.executor(action) + + assert result.success is True + assert result.num_locations == 4 + assert "Successfully submitted 4 location(s)" in result.validation_message + + def test_missing_file_warning(self): + """Test that missing files trigger a warning.""" + action = LocalizationFinishAction( + locations=[CodeLocation(file="nonexistent_file.py")] + ) + result = self.executor(action) + + # Should still parse but with warning + assert result.success is False + assert result.num_locations == 1 + assert "not found in workspace" in result.validation_message + assert "nonexistent_file.py" in result.validation_message + + def test_some_files_missing(self): + """Test when some files exist and some don't.""" + # Create only one file + Path(self.temp_dir, "exists.py").touch() + + action = LocalizationFinishAction( + locations=[ + CodeLocation(file="exists.py"), + CodeLocation(file="missing.py"), + ] + ) + result = self.executor(action) + + assert result.success is False + assert result.num_locations == 2 + assert "missing.py" in result.validation_message + + def test_executor_without_workspace(self): + """Test executor without workspace validation.""" + executor = LocalizationFinishExecutor(workspace_dir=None) + + action = LocalizationFinishAction( + locations=[CodeLocation(file="any_file.py")] + ) + result = executor(action) + + # Should succeed without file existence check + assert result.success is True + assert result.num_locations == 1 + + def test_nested_file_path(self): + """Test with nested file paths.""" + # Create nested structure + nested_dir = Path(self.temp_dir, "src", "utils") + nested_dir.mkdir(parents=True) + Path(nested_dir, "helper.py").touch() + + action = LocalizationFinishAction( + locations=[ + CodeLocation(file="src/utils/helper.py", function_name="process") + ] + ) + result = self.executor(action) + + assert result.success is True + assert result.num_locations == 1 + + def test_structured_class_and_function(self): + """Test with class and function specified separately.""" + file_path = "test.py" + Path(self.temp_dir, file_path).touch() + + action = LocalizationFinishAction( + locations=[ + CodeLocation( + file=file_path, + class_name="MyClass", + function_name="my_method" + ) + ] + ) + result = self.executor(action) + + assert result.success is True + assert result.num_locations == 1 + # Verify structured data is preserved + assert result.details["locations"][0]["class"] == "MyClass" + assert result.details["locations"][0]["function"] == "my_method" + + def test_multiple_missing_files_truncated(self): + """Test that many missing files are truncated in message.""" + action = LocalizationFinishAction( + locations=[ + CodeLocation(file=f"file{i}.py") for i in range(1, 8) + ] + ) + result = self.executor(action) + + assert result.success is False + assert result.num_locations == 7 + # Should show truncation message + assert "and 2 more" in result.validation_message + + +class TestLocalizationFinishObservation: + """Tests for LocalizationFinishObservation visualization.""" + + def test_success_visualization(self): + """Test visualization of successful observation.""" + obs = LocalizationFinishObservation( + success=True, + num_locations=3, + validation_message="Successfully submitted 3 location(s)." + ) + text = obs.visualize + + assert "✓" in str(text) + assert "3 location(s)" in str(text) + + def test_failure_visualization(self): + """Test visualization of failed observation.""" + obs = LocalizationFinishObservation( + success=False, + num_locations=0, + validation_message="Missing file paths" + ) + text = obs.visualize + + assert "✗" in str(text) + assert "Missing file paths" in str(text) + + +if __name__ == "__main__": + pytest.main([__file__, "-v"])