From 27f2f50bad92dcaa4687eae74539064c68edfc7e Mon Sep 17 00:00:00 2001 From: abhinavgautam01 Date: Sat, 2 May 2026 09:48:04 +0530 Subject: [PATCH 1/8] Fix coding_env API signature compatibility Accept optional reset/step parameters in PythonCodeActEnv and add tests for episode_id and timeout_s handling. --- envs/coding_env/server/python_codeact_env.py | 20 ++++++++++++++++--- tests/envs/test_python_codeact_reset.py | 21 ++++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/envs/coding_env/server/python_codeact_env.py b/envs/coding_env/server/python_codeact_env.py index dbfc39e6a..edeb4441f 100644 --- a/envs/coding_env/server/python_codeact_env.py +++ b/envs/coding_env/server/python_codeact_env.py @@ -12,6 +12,7 @@ """ import uuid +from typing import Any, Optional from openenv.core.env_server.interfaces import Action, Environment, Observation @@ -50,7 +51,12 @@ def __init__( self._executor = PyExecutor() self._state = CodeState() - def reset(self) -> Observation: + def reset( + self, + seed: Optional[int] = None, + episode_id: Optional[str] = None, + **kwargs: Any, + ) -> Observation: """ Reset environment and start fresh execution session. @@ -58,7 +64,10 @@ def reset(self) -> Observation: Initial observation with empty stdout/stderr and exit_code=0 """ # Initialize fresh state - self._state = CodeState(episode_id=str(uuid.uuid4()), step_count=0) + self._state = CodeState( + episode_id=episode_id or str(uuid.uuid4()), + step_count=0, + ) # Add last_exit_code to state self._state.last_exit_code = 0 @@ -77,7 +86,12 @@ def reset(self) -> Observation: return self._apply_transform(observation) - def step(self, action: Action) -> Observation: + def step( + self, + action: Action, + timeout_s: Optional[float] = None, + **kwargs: Any, + ) -> Observation: """ Execute code action and return observation. diff --git a/tests/envs/test_python_codeact_reset.py b/tests/envs/test_python_codeact_reset.py index b4d8b59f1..bd0a767c9 100644 --- a/tests/envs/test_python_codeact_reset.py +++ b/tests/envs/test_python_codeact_reset.py @@ -166,3 +166,24 @@ def test_reset_changes_episode_id(): # Episode IDs should be different assert episode_id_1 != episode_id_2 + + +def test_reset_accepts_episode_id_override(): + """Test that reset() accepts an explicit episode_id.""" + env = PythonCodeActEnv() + + env.reset(episode_id="episode-123") + + assert env.state.episode_id == "episode-123" + assert env.state.step_count == 0 + + +def test_step_accepts_timeout_parameter(): + """Test that step() accepts timeout_s without raising TypeError.""" + env = PythonCodeActEnv() + env.reset() + + obs = env.step(CodeAction(code="print('ok')"), timeout_s=0.5) + + assert obs.exit_code == 0 + assert "ok" in obs.stdout From fe57c1c192e350b57b41b05b4a75c9d7a8ffad08 Mon Sep 17 00:00:00 2001 From: abhinavgautam01 Date: Sat, 2 May 2026 10:06:56 +0530 Subject: [PATCH 2/8] Fix coding_env safety reward false positives with AST detection --- envs/coding_env/server/__init__.py | 16 +++++- envs/coding_env/server/transforms.py | 61 ++++++++++++++++------ tests/envs/test_coding_safety_transform.py | 57 ++++++++++++++++++++ 3 files changed, 115 insertions(+), 19 deletions(-) create mode 100644 tests/envs/test_coding_safety_transform.py diff --git a/envs/coding_env/server/__init__.py b/envs/coding_env/server/__init__.py index dab6b748a..41d01bba7 100644 --- a/envs/coding_env/server/__init__.py +++ b/envs/coding_env/server/__init__.py @@ -4,8 +4,20 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -"""Coding environment server components.""" +"""Coding environment server components. -from .python_codeact_env import PythonCodeActEnv +Keep imports lazy so utility modules (for example transforms) remain importable +without pulling optional runtime dependencies like smolagents. +""" + +from typing import Any __all__ = ["PythonCodeActEnv"] + + +def __getattr__(name: str) -> Any: + if name == "PythonCodeActEnv": + from .python_codeact_env import PythonCodeActEnv + + return PythonCodeActEnv + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/envs/coding_env/server/transforms.py b/envs/coding_env/server/transforms.py index fc92e89ba..101f74cfe 100644 --- a/envs/coding_env/server/transforms.py +++ b/envs/coding_env/server/transforms.py @@ -7,7 +7,6 @@ """Transforms specific to coding environments.""" import ast -import re from openenv.core.env_server.base_transforms import CompositeTransform from openenv.core.env_server.interfaces import Transform @@ -21,14 +20,44 @@ class CodeSafetyTransform(Transform): def __init__(self, penalty: float = -1.0): self.penalty = penalty - self.dangerous_patterns = [ - r"import\s+os", - r"import\s+subprocess", - r"eval\(", - r"exec\(", - r"__import__", - r"open\(", - ] + + def _detect_violation(self, code: str) -> str | None: + """ + Detect dangerous operations using AST analysis. + + AST-based detection avoids false positives from harmless string literals + (e.g. ``print("import os")``) or similarly named user functions + (e.g. ``myopen()``). + """ + try: + tree = ast.parse(code) + except SyntaxError: + # Syntax quality is handled by CodeQualityTransform. + return None + + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for alias in node.names: + top_level_module = alias.name.split(".", 1)[0] + if top_level_module in {"os", "subprocess"}: + return f"import {top_level_module}" + + if isinstance(node, ast.ImportFrom) and node.module: + top_level_module = node.module.split(".", 1)[0] + if top_level_module in {"os", "subprocess"}: + return f"import {top_level_module}" + + if isinstance(node, ast.Call): + called_name: str | None = None + if isinstance(node.func, ast.Name): + called_name = node.func.id + elif isinstance(node.func, ast.Attribute): + called_name = node.func.attr + + if called_name in {"eval", "exec", "open", "__import__"}: + return called_name + + return None def __call__(self, observation: Observation) -> Observation: if not isinstance(observation, CodeObservation): @@ -36,14 +65,12 @@ def __call__(self, observation: Observation) -> Observation: if "last_code" in observation.metadata: code = observation.metadata["last_code"] - for pattern in self.dangerous_patterns: - if re.search(pattern, code): - observation.reward = self.penalty - observation.metadata["safety_violation"] = pattern - break - else: - if observation.reward is None: - observation.reward = 0.0 + violation = self._detect_violation(code) + if violation is not None: + observation.reward = self.penalty + observation.metadata["safety_violation"] = violation + elif observation.reward is None: + observation.reward = 0.0 return observation diff --git a/tests/envs/test_coding_safety_transform.py b/tests/envs/test_coding_safety_transform.py new file mode 100644 index 000000000..9b4768cff --- /dev/null +++ b/tests/envs/test_coding_safety_transform.py @@ -0,0 +1,57 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""Tests for coding_env safety transform false-positive handling.""" + +import os +import sys +from pathlib import Path + +# Add the project root and src to the path +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))) +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src")) + +from envs.coding_env.models import CodeObservation +from envs.coding_env.server.transforms import CodeSafetyTransform + + +def _apply_safety_transform(code: str) -> CodeObservation: + transform = CodeSafetyTransform() + observation = CodeObservation( + stdout="", + stderr="", + exit_code=0, + metadata={"last_code": code}, + ) + transformed = transform(observation) + assert isinstance(transformed, CodeObservation) + return transformed + + +def test_blocks_real_dangerous_import(): + observation = _apply_safety_transform("import os\nprint('x')") + assert observation.reward == -1.0 + assert "safety_violation" in observation.metadata + + +def test_blocks_builtin_open_call(): + observation = _apply_safety_transform("with open('f.txt') as f:\n data = f.read()") + assert observation.reward == -1.0 + assert "safety_violation" in observation.metadata + + +def test_does_not_flag_string_literal_with_dangerous_text(): + observation = _apply_safety_transform("print('import os')") + assert observation.reward == 0.0 + assert "safety_violation" not in observation.metadata + + +def test_does_not_flag_user_defined_myopen_function(): + observation = _apply_safety_transform( + "def myopen():\n return 1\nresult = myopen()" + ) + assert observation.reward == 0.0 + assert "safety_violation" not in observation.metadata From fc49a724ae4cf9d6e7f8f5a24544e4ec1fccf87a Mon Sep 17 00:00:00 2001 From: abhinavgautam01 Date: Sat, 2 May 2026 11:01:16 +0530 Subject: [PATCH 3/8] Address Greptile findings for coding_env safety and step signature --- envs/coding_env/server/python_codeact_env.py | 2 -- envs/coding_env/server/transforms.py | 8 ++------ tests/envs/test_coding_safety_transform.py | 12 ++++++++++++ tests/envs/test_python_codeact_reset.py | 11 ----------- 4 files changed, 14 insertions(+), 19 deletions(-) diff --git a/envs/coding_env/server/python_codeact_env.py b/envs/coding_env/server/python_codeact_env.py index edeb4441f..061642ed2 100644 --- a/envs/coding_env/server/python_codeact_env.py +++ b/envs/coding_env/server/python_codeact_env.py @@ -89,8 +89,6 @@ def reset( def step( self, action: Action, - timeout_s: Optional[float] = None, - **kwargs: Any, ) -> Observation: """ Execute code action and return observation. diff --git a/envs/coding_env/server/transforms.py b/envs/coding_env/server/transforms.py index 101f74cfe..a47b80ad8 100644 --- a/envs/coding_env/server/transforms.py +++ b/envs/coding_env/server/transforms.py @@ -48,14 +48,10 @@ def _detect_violation(self, code: str) -> str | None: return f"import {top_level_module}" if isinstance(node, ast.Call): - called_name: str | None = None if isinstance(node.func, ast.Name): called_name = node.func.id - elif isinstance(node.func, ast.Attribute): - called_name = node.func.attr - - if called_name in {"eval", "exec", "open", "__import__"}: - return called_name + if called_name in {"eval", "exec", "open", "__import__"}: + return called_name return None diff --git a/tests/envs/test_coding_safety_transform.py b/tests/envs/test_coding_safety_transform.py index 9b4768cff..4f59193a1 100644 --- a/tests/envs/test_coding_safety_transform.py +++ b/tests/envs/test_coding_safety_transform.py @@ -55,3 +55,15 @@ def test_does_not_flag_user_defined_myopen_function(): ) assert observation.reward == 0.0 assert "safety_violation" not in observation.metadata + + +def test_does_not_flag_attribute_method_named_exec(): + observation = _apply_safety_transform( + "class DB:\n" + " def exec(self, sql):\n" + " return sql\n" + "db = DB()\n" + "result = db.exec('SELECT 1')" + ) + assert observation.reward == 0.0 + assert "safety_violation" not in observation.metadata diff --git a/tests/envs/test_python_codeact_reset.py b/tests/envs/test_python_codeact_reset.py index bd0a767c9..e6c6ed113 100644 --- a/tests/envs/test_python_codeact_reset.py +++ b/tests/envs/test_python_codeact_reset.py @@ -176,14 +176,3 @@ def test_reset_accepts_episode_id_override(): assert env.state.episode_id == "episode-123" assert env.state.step_count == 0 - - -def test_step_accepts_timeout_parameter(): - """Test that step() accepts timeout_s without raising TypeError.""" - env = PythonCodeActEnv() - env.reset() - - obs = env.step(CodeAction(code="print('ok')"), timeout_s=0.5) - - assert obs.exit_code == 0 - assert "ok" in obs.stdout From 4e9c39712b3c74ac6a0421b3e196ab1a618d0654 Mon Sep 17 00:00:00 2001 From: abhinavgautam01 Date: Thu, 14 May 2026 01:24:31 +0530 Subject: [PATCH 4/8] Address coding env API review issues --- envs/coding_env/server/app.py | 3 ++- envs/coding_env/server/python_codeact_env.py | 7 +++++++ envs/coding_env/server/transforms.py | 4 +++- tests/envs/test_coding_safety_transform.py | 16 +++++----------- 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/envs/coding_env/server/app.py b/envs/coding_env/server/app.py index 4c712916b..2271b69de 100644 --- a/envs/coding_env/server/app.py +++ b/envs/coding_env/server/app.py @@ -21,9 +21,10 @@ python -m envs.coding_env.server.app """ +from openenv.core.env_server import create_app + from coding_env.models import CodeAction, CodeObservation from coding_env.server.python_codeact_env import PythonCodeActEnv -from openenv.core.env_server import create_app # Create the app with web interface and README integration # Pass the class (factory) instead of an instance for WebSocket session support diff --git a/envs/coding_env/server/python_codeact_env.py b/envs/coding_env/server/python_codeact_env.py index 061642ed2..2ebac30d7 100644 --- a/envs/coding_env/server/python_codeact_env.py +++ b/envs/coding_env/server/python_codeact_env.py @@ -89,12 +89,17 @@ def reset( def step( self, action: Action, + timeout_s: Optional[float] = None, + **kwargs: Any, ) -> Observation: """ Execute code action and return observation. Args: action: CodeAction containing the code to execute + timeout_s: Optional timeout accepted for Environment API compatibility. + PyExecutor does not currently expose per-call timeout control. + **kwargs: Additional step parameters accepted for API compatibility. Returns: CodeObservation with execution results (stdout, stderr, exit_code) @@ -102,6 +107,8 @@ def step( Raises: ValueError: If action is not a CodeAction instance """ + del timeout_s, kwargs + if not isinstance(action, CodeAction): raise ValueError(f"Expected CodeAction, got {type(action)}") diff --git a/envs/coding_env/server/transforms.py b/envs/coding_env/server/transforms.py index a47b80ad8..f5a856012 100644 --- a/envs/coding_env/server/transforms.py +++ b/envs/coding_env/server/transforms.py @@ -32,7 +32,9 @@ def _detect_violation(self, code: str) -> str | None: try: tree = ast.parse(code) except SyntaxError: - # Syntax quality is handled by CodeQualityTransform. + # Intentional trade-off: once the code is syntactically invalid, + # this AST-only safety pass cannot reliably inspect partial code. + # CodeQualityTransform applies the syntax penalty instead. return None for node in ast.walk(tree): diff --git a/tests/envs/test_coding_safety_transform.py b/tests/envs/test_coding_safety_transform.py index 4f59193a1..cf76503b4 100644 --- a/tests/envs/test_coding_safety_transform.py +++ b/tests/envs/test_coding_safety_transform.py @@ -6,16 +6,8 @@ """Tests for coding_env safety transform false-positive handling.""" -import os -import sys -from pathlib import Path - -# Add the project root and src to the path -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))) -sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src")) - -from envs.coding_env.models import CodeObservation -from envs.coding_env.server.transforms import CodeSafetyTransform +from coding_env.models import CodeObservation +from coding_env.server.transforms import CodeSafetyTransform def _apply_safety_transform(code: str) -> CodeObservation: @@ -38,7 +30,9 @@ def test_blocks_real_dangerous_import(): def test_blocks_builtin_open_call(): - observation = _apply_safety_transform("with open('f.txt') as f:\n data = f.read()") + observation = _apply_safety_transform( + "with open('f.txt') as f:\n data = f.read()" + ) assert observation.reward == -1.0 assert "safety_violation" in observation.metadata From 3e91ae1d376898e745e296d216ade469615a6832 Mon Sep 17 00:00:00 2001 From: abhinavgautam01 Date: Thu, 14 May 2026 01:38:09 +0530 Subject: [PATCH 5/8] Tighten coding env API signatures --- envs/coding_env/server/python_codeact_env.py | 10 +--------- tests/envs/test_coding_safety_transform.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/envs/coding_env/server/python_codeact_env.py b/envs/coding_env/server/python_codeact_env.py index 2ebac30d7..6bef79bd0 100644 --- a/envs/coding_env/server/python_codeact_env.py +++ b/envs/coding_env/server/python_codeact_env.py @@ -12,7 +12,7 @@ """ import uuid -from typing import Any, Optional +from typing import Optional from openenv.core.env_server.interfaces import Action, Environment, Observation @@ -55,7 +55,6 @@ def reset( self, seed: Optional[int] = None, episode_id: Optional[str] = None, - **kwargs: Any, ) -> Observation: """ Reset environment and start fresh execution session. @@ -89,17 +88,12 @@ def reset( def step( self, action: Action, - timeout_s: Optional[float] = None, - **kwargs: Any, ) -> Observation: """ Execute code action and return observation. Args: action: CodeAction containing the code to execute - timeout_s: Optional timeout accepted for Environment API compatibility. - PyExecutor does not currently expose per-call timeout control. - **kwargs: Additional step parameters accepted for API compatibility. Returns: CodeObservation with execution results (stdout, stderr, exit_code) @@ -107,8 +101,6 @@ def step( Raises: ValueError: If action is not a CodeAction instance """ - del timeout_s, kwargs - if not isinstance(action, CodeAction): raise ValueError(f"Expected CodeAction, got {type(action)}") diff --git a/tests/envs/test_coding_safety_transform.py b/tests/envs/test_coding_safety_transform.py index cf76503b4..323b61bb2 100644 --- a/tests/envs/test_coding_safety_transform.py +++ b/tests/envs/test_coding_safety_transform.py @@ -37,6 +37,24 @@ def test_blocks_builtin_open_call(): assert "safety_violation" in observation.metadata +def test_blocks_builtin_eval_call(): + observation = _apply_safety_transform("result = eval('1 + 1')") + assert observation.reward == -1.0 + assert observation.metadata["safety_violation"] == "eval" + + +def test_blocks_builtin_exec_call(): + observation = _apply_safety_transform("exec('x = 1')") + assert observation.reward == -1.0 + assert observation.metadata["safety_violation"] == "exec" + + +def test_blocks_builtin_import_call(): + observation = _apply_safety_transform("__import__('os')") + assert observation.reward == -1.0 + assert observation.metadata["safety_violation"] == "__import__" + + def test_does_not_flag_string_literal_with_dangerous_text(): observation = _apply_safety_transform("print('import os')") assert observation.reward == 0.0 From 2d0bc8f4112ae375262a03109c3bf970592cb5aa Mon Sep 17 00:00:00 2001 From: abhinavgautam01 Date: Thu, 14 May 2026 07:46:48 +0530 Subject: [PATCH 6/8] Clarify coding env reset compatibility --- envs/coding_env/server/python_codeact_env.py | 12 +++++++++++- envs/coding_env/server/transforms.py | 7 ++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/envs/coding_env/server/python_codeact_env.py b/envs/coding_env/server/python_codeact_env.py index 6bef79bd0..2804d6a65 100644 --- a/envs/coding_env/server/python_codeact_env.py +++ b/envs/coding_env/server/python_codeact_env.py @@ -12,7 +12,7 @@ """ import uuid -from typing import Optional +from typing import Any, Optional from openenv.core.env_server.interfaces import Action, Environment, Observation @@ -55,13 +55,23 @@ def reset( self, seed: Optional[int] = None, episode_id: Optional[str] = None, + **kwargs: Any, ) -> Observation: """ Reset environment and start fresh execution session. + Args: + seed: Accepted for API compatibility. This deterministic executor + has no random state to seed. + episode_id: Optional episode identifier override. + **kwargs: Forward-compatible reset parameters accepted by the base + Environment API but unused by this environment. + Returns: Initial observation with empty stdout/stderr and exit_code=0 """ + del seed, kwargs + # Initialize fresh state self._state = CodeState( episode_id=episode_id or str(uuid.uuid4()), diff --git a/envs/coding_env/server/transforms.py b/envs/coding_env/server/transforms.py index f5a856012..371d3c0d3 100644 --- a/envs/coding_env/server/transforms.py +++ b/envs/coding_env/server/transforms.py @@ -16,7 +16,12 @@ class CodeSafetyTransform(Transform): - """Evaluates code safety and assigns penalties for dangerous patterns.""" + """ + Assign penalties for obviously unsafe coding patterns. + + This is a reward heuristic, not a security sandbox. Container isolation is + the security boundary; this transform only shapes rewards for common cases. + """ def __init__(self, penalty: float = -1.0): self.penalty = penalty From 37abfb180eec8319f84ead24411b4d20def0799c Mon Sep 17 00:00:00 2001 From: abhinavgautam01 Date: Thu, 14 May 2026 07:57:37 +0530 Subject: [PATCH 7/8] Harden coding safety AST parsing --- envs/coding_env/server/transforms.py | 9 +++++---- tests/envs/test_coding_safety_transform.py | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/envs/coding_env/server/transforms.py b/envs/coding_env/server/transforms.py index 371d3c0d3..2cc25b7fd 100644 --- a/envs/coding_env/server/transforms.py +++ b/envs/coding_env/server/transforms.py @@ -36,10 +36,11 @@ def _detect_violation(self, code: str) -> str | None: """ try: tree = ast.parse(code) - except SyntaxError: - # Intentional trade-off: once the code is syntactically invalid, - # this AST-only safety pass cannot reliably inspect partial code. - # CodeQualityTransform applies the syntax penalty instead. + except (SyntaxError, RecursionError, ValueError): + # Intentional trade-off: once the code is syntactically invalid or + # pathologically nested, this AST-only safety pass cannot reliably + # inspect partial code. CodeQualityTransform applies the syntax + # penalty instead. return None for node in ast.walk(tree): diff --git a/tests/envs/test_coding_safety_transform.py b/tests/envs/test_coding_safety_transform.py index 323b61bb2..0ded50ac9 100644 --- a/tests/envs/test_coding_safety_transform.py +++ b/tests/envs/test_coding_safety_transform.py @@ -29,6 +29,24 @@ def test_blocks_real_dangerous_import(): assert "safety_violation" in observation.metadata +def test_blocks_subprocess_import(): + observation = _apply_safety_transform("import subprocess") + assert observation.reward == -1.0 + assert observation.metadata["safety_violation"] == "import subprocess" + + +def test_blocks_from_subprocess_import(): + observation = _apply_safety_transform("from subprocess import run") + assert observation.reward == -1.0 + assert observation.metadata["safety_violation"] == "import subprocess" + + +def test_blocks_from_os_path_import(): + observation = _apply_safety_transform("from os.path import join") + assert observation.reward == -1.0 + assert observation.metadata["safety_violation"] == "import os" + + def test_blocks_builtin_open_call(): observation = _apply_safety_transform( "with open('f.txt') as f:\n data = f.read()" From 493a4964bea73fb9b9518cbff80c955f5508df46 Mon Sep 17 00:00:00 2001 From: abhinavgautam01 Date: Thu, 14 May 2026 08:12:57 +0530 Subject: [PATCH 8/8] Harden coding env quality transform --- envs/coding_env/server/python_codeact_env.py | 2 +- envs/coding_env/server/transforms.py | 2 +- tests/envs/test_coding_safety_transform.py | 22 +++++++++++++++++++- tests/envs/test_python_codeact_reset.py | 10 +++++++++ 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/envs/coding_env/server/python_codeact_env.py b/envs/coding_env/server/python_codeact_env.py index 2804d6a65..043838096 100644 --- a/envs/coding_env/server/python_codeact_env.py +++ b/envs/coding_env/server/python_codeact_env.py @@ -74,7 +74,7 @@ def reset( # Initialize fresh state self._state = CodeState( - episode_id=episode_id or str(uuid.uuid4()), + episode_id=episode_id if episode_id is not None else str(uuid.uuid4()), step_count=0, ) # Add last_exit_code to state diff --git a/envs/coding_env/server/transforms.py b/envs/coding_env/server/transforms.py index 2cc25b7fd..5baed87ce 100644 --- a/envs/coding_env/server/transforms.py +++ b/envs/coding_env/server/transforms.py @@ -108,7 +108,7 @@ def __call__(self, observation: Observation) -> Observation: # Check syntax (redundant but useful for quality assessment) try: ast.parse(code) - except SyntaxError: + except (SyntaxError, RecursionError, ValueError): quality_score += self.syntax_penalty # Add to existing reward diff --git a/tests/envs/test_coding_safety_transform.py b/tests/envs/test_coding_safety_transform.py index 0ded50ac9..9a0986f35 100644 --- a/tests/envs/test_coding_safety_transform.py +++ b/tests/envs/test_coding_safety_transform.py @@ -7,7 +7,7 @@ """Tests for coding_env safety transform false-positive handling.""" from coding_env.models import CodeObservation -from coding_env.server.transforms import CodeSafetyTransform +from coding_env.server.transforms import CodeQualityTransform, CodeSafetyTransform def _apply_safety_transform(code: str) -> CodeObservation: @@ -97,3 +97,23 @@ def test_does_not_flag_attribute_method_named_exec(): ) assert observation.reward == 0.0 assert "safety_violation" not in observation.metadata + + +def test_quality_transform_handles_ast_recursion_error(monkeypatch): + def raise_recursion_error(_code: str): + raise RecursionError("pathologically nested code") + + monkeypatch.setattr("coding_env.server.transforms.ast.parse", raise_recursion_error) + + transform = CodeQualityTransform(concise_bonus=0.0, syntax_penalty=-0.2) + observation = CodeObservation( + stdout="", + stderr="", + exit_code=0, + metadata={"last_code": "x = 1"}, + ) + + transformed = transform(observation) + + assert isinstance(transformed, CodeObservation) + assert transformed.reward == -0.2 diff --git a/tests/envs/test_python_codeact_reset.py b/tests/envs/test_python_codeact_reset.py index e6c6ed113..55bd9c03b 100644 --- a/tests/envs/test_python_codeact_reset.py +++ b/tests/envs/test_python_codeact_reset.py @@ -176,3 +176,13 @@ def test_reset_accepts_episode_id_override(): assert env.state.episode_id == "episode-123" assert env.state.step_count == 0 + + +def test_reset_preserves_empty_episode_id_override(): + """Test that reset() preserves any explicit non-None episode_id.""" + env = PythonCodeActEnv() + + env.reset(episode_id="") + + assert env.state.episode_id == "" + assert env.state.step_count == 0