From 50630ca130aed0c6da5e4fc8ee5f0d6a86154795 Mon Sep 17 00:00:00 2001 From: Sami Marreed Date: Wed, 20 May 2026 16:28:46 +0300 Subject: [PATCH 1/7] refactor(agent-core): extract shared agent logic into cuga_agent_core module --- .claude/settings.json | 9 + .../nodes/api/code_agent/code_act_agent.py | 362 --------- .../code_agent/tests/test_code_act_agent.py | 58 -- .../nodes/cuga_agent_core/__init__.py | 0 .../nodes/cuga_agent_core/code_extraction.py | 89 +++ .../nodes/cuga_agent_core/execution_policy.py | 141 ++++ .../nodes/cuga_agent_core/graph_nodes.py | 280 +++++++ .../nodes/cuga_agent_core/runtime_tools.py | 155 ++++ .../nodes/cuga_agent_core/shared_graph.py | 59 ++ .../nodes/cuga_agent_core/shared_nodes.py | 249 +++++++ .../nodes/cuga_agent_core/tests/__init__.py | 0 .../tests/test_code_extraction.py | 99 +++ .../tests/test_execution_policy.py | 274 +++++++ .../tests/test_graph_adapter_hooks.py | 236 ++++++ .../cuga_agent_core/tests/test_graph_nodes.py | 336 +++++++++ .../tests/test_policy_enactment_adapter.py | 91 +++ .../tests/test_runtime_tools.py | 288 +++++++ .../tests/test_shared_call_model.py | 302 ++++++++ .../tests/test_shared_graph_builder.py | 150 ++++ .../tests/test_supervisor_feature_parity.py | 180 +++++ .../tests/test_supervisor_tool_provider.py | 153 ++++ .../tests/test_tool_approval_adapter.py | 170 +++++ .../tests/test_variable_bridge.py | 149 ++++ .../nodes/cuga_agent_core/variable_bridge.py | 59 ++ .../nodes/cuga_lite/agent_graph_adapter.py | 244 ++++++ .../nodes/cuga_lite/cuga_lite_graph.py | 705 +++--------------- .../cuga_lite/executors/code_executor.py | 17 +- .../tests/test_execution_plan_wiring.py | 132 ++++ .../tests/test_extract_codeblocks.py | 2 +- .../executors/tests/test_sync_async_tools.py | 2 +- ...test_cuga_lite_graph_evolve_integration.py | 4 +- .../tests/test_agent_graph_adapter.py | 294 ++++++++ .../test_cuga_lite_graph_evolve_guidelines.py | 2 +- .../nodes/cuga_lite/tool_approval_handler.py | 176 ++--- .../cuga_supervisor/cuga_supervisor_graph.py | 653 ++-------------- .../cuga_supervisor/cuga_supervisor_node.py | 51 +- .../supervisor_graph_adapter.py | 542 ++++++++++++++ .../nodes/cuga_supervisor/tests/__init__.py | 0 .../tests/test_supervisor_graph_adapter.py | 159 ++++ .../backend/cuga_graph/policy/enactment.py | 30 +- src/cuga/config.py | 6 + src/cuga/sdk.py | 16 + 42 files changed, 5161 insertions(+), 1763 deletions(-) create mode 100644 .claude/settings.json delete mode 100644 src/cuga/backend/cuga_graph/nodes/api/code_agent/code_act_agent.py delete mode 100644 src/cuga/backend/cuga_graph/nodes/api/code_agent/tests/test_code_act_agent.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/__init__.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/code_extraction.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/execution_policy.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph_nodes.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/runtime_tools.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/shared_graph.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/shared_nodes.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/__init__.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_code_extraction.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_execution_policy.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_graph_adapter_hooks.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_graph_nodes.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_policy_enactment_adapter.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_runtime_tools.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_shared_call_model.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_shared_graph_builder.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_supervisor_feature_parity.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_supervisor_tool_provider.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_tool_approval_adapter.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_variable_bridge.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/variable_bridge.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_lite/agent_graph_adapter.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_execution_plan_wiring.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_agent_graph_adapter.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_supervisor/supervisor_graph_adapter.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_supervisor/tests/__init__.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_supervisor/tests/test_supervisor_graph_adapter.py diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 00000000..16f8e2f6 --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,9 @@ +{ + "permissions": { + "allow": [ + "Bash(python -m pytest src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_runtime_tools.py::test_skill_tool_is_prompt_dict_compatible src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_runtime_tools.py::test_skill_tool_func_is_awaitable_via_make_tool_awaitable -v)", + "Bash(python -m pytest src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/ src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/ -q)", + "Bash(python -m *)" + ] + } +} diff --git a/src/cuga/backend/cuga_graph/nodes/api/code_agent/code_act_agent.py b/src/cuga/backend/cuga_graph/nodes/api/code_agent/code_act_agent.py deleted file mode 100644 index 84250a6a..00000000 --- a/src/cuga/backend/cuga_graph/nodes/api/code_agent/code_act_agent.py +++ /dev/null @@ -1,362 +0,0 @@ -# Copyright (c) 2025 LangChain -# Modifications Copyright 2025 CUGA -# Licensed under the MIT License - -import inspect -from typing import Any, Awaitable, Callable, Optional, Sequence, Type, TypeVar, Union - -from langchain_core.language_models import BaseChatModel -from langchain_core.tools import StructuredTool -from langchain_core.tools import tool as create_tool -from langchain_core.runnables import RunnableConfig -from langgraph.graph import END, START, MessagesState, StateGraph -from langgraph.types import Command -import json -from loguru import logger -from cuga.config import settings -import re - -from cuga.backend.activity_tracker.tracker import ActivityTracker, Step -from cuga.backend.llm.models import LLMManager - -tracker = ActivityTracker() -llm_manager = LLMManager() - - -EvalFunction = Callable[[str, dict[str, Any]], tuple[str, dict[str, Any]]] -EvalCoroutine = Callable[[str, dict[str, Any]], Awaitable[tuple[str, dict[str, Any]]]] - - -BACKTICK_PATTERN = r'```python(.*?)```' - - -def extract_and_combine_codeblocks(text: str) -> str: - """ - Extracts all codeblocks from a text string and combines them into a single code string. - - Args: - text: A string containing zero or more codeblocks, where each codeblock is - surrounded by triple backticks (```). - - Returns: - A string containing the combined code from all codeblocks, with each codeblock - separated by a newline. - - Example: - text = '''Here's some code: - - ```python - print('hello') - ``` - And more: - - ``` - print('world') - ```''' - - result = extract_and_combine_codeblocks(text) - - Result: - - print('hello') - - print('world') - """ - # Find all code blocks in the text using regex - # Pattern matches anything between triple backticks, with or without a language identifier - code_blocks = re.findall(BACKTICK_PATTERN, text, re.DOTALL) - - if code_blocks: - # Process each codeblock - processed_blocks = [] - for block in code_blocks: - # Strip leading and trailing whitespace - block = block.strip() - processed_blocks.append(block) - - # Combine all codeblocks with newlines between them - combined_code = "\n\n".join(processed_blocks) - - # Check if the combined code has print - if "print(" not in combined_code: - return "" - - return combined_code - - # No markdown blocks found, check if the text itself is valid Python code - stripped_text = text.strip() - - # Check if it has print - if "print(" not in stripped_text: - return "" - - try: - compile(stripped_text.replace('await ', ''), '', 'exec') - return stripped_text - except SyntaxError: - return "" - - -EvalFunction = Callable[[str, dict[str, Any]], tuple[str, dict[str, Any]]] -EvalCoroutine = Callable[[str, dict[str, Any]], Awaitable[tuple[str, dict[str, Any]]]] - - -async def check_if_asking_to_proceed(content: str) -> bool: - """ - Uses a simple LLM call to determine if the assistant is asking to proceed - without explicitly asking for parameter approval. - - Returns: - bool: True if asking to proceed (should auto-continue), False otherwise - """ - check_prompt = f"""Determine if the assistant is unnecessarily explaining what they're about to do instead of just doing it. - -Answer "yes" ONLY if the response matches these patterns (should auto-proceed): -- Explaining what they will do: "Let's start by reading...", "I will first read...", "Let me do that now..." -- Planning steps: "To do X, I will first do A, then B, then C" -- Announcing actions: "I'll read the contents of...", "I'm going to check..." - -Answer "no" if the response: -- Actually executes code (contains code blocks) -- Asks for specific parameter values: "Which email format do you prefer?" -- Asks for explicit approval: "Should I delete this?" -- Presents final answers/results: "Here are the 5 users..." -- Reports errors needing user input - -Examples that should return "yes" (auto-proceed): -1. "To determine which users from contacts.txt belong to the CRM system, I will first read the contents of the contacts.txt file and then retrieve the list of contacts from the CRM system. After that, I'll compare the two lists to identify the users that belong to the CRM system.\n\nLet's start by reading the contacts.txt file." -2. "To provide you with accurate information about CUGA, I'll read the contents of the file cuga_knowledge.md in the workspace. Let me do that now." -3. "I'll read the playbook file to understand the process." - -Examples that should return "no" (needs user interaction): -1. "Which email template would you like to use - formal or casual?" -2. "Here are the 5 matching contacts: John, Jane, Bob, Alice, Charlie." -3. "I need the account ID to proceed. Which account should I query?" -4. "Should I proceed with deleting these 10 records?" - -Assistant response to check: -{content} - -Your answer (yes/no):""" - - try: - checker_model = llm_manager.get_model(settings.agent.code.model) - response = await checker_model.ainvoke([{"role": "user", "content": check_prompt}]) - decision = response.content.strip().lower() - - logger.debug(f"Proceed check decision: {decision}") - return decision == "yes" - except Exception as e: - logger.warning(f"Error in proceed check: {e}") - return False - - -class CodeActState(MessagesState): - """State for CodeAct agent.""" - - script: Optional[str] - """The Python code script to be executed.""" - context: dict[str, Any] - """Dictionary containing the execution context with available tools and variables.""" - - -StateSchema = TypeVar("StateSchema", bound=CodeActState) -StateSchemaType = Type[StateSchema] - - -def _get_tool_args_json_schema(tool: StructuredTool) -> Optional[dict[str, Any]]: - """Return a JSON schema dict for a tool's arguments, when available.""" - args_schema = getattr(tool, "args_schema", None) - if args_schema is None: - return None - - if isinstance(args_schema, dict): - return args_schema - - model_json_schema = getattr(args_schema, "model_json_schema", None) - if callable(model_json_schema): - return model_json_schema() - - schema = getattr(args_schema, "schema", None) - if callable(schema): - return schema() - - return None - - -def create_default_prompt(tools: list[StructuredTool], base_prompt: Optional[str] = None): - """Create default prompt for the CodeAct agent.""" - tools = [t if isinstance(t, StructuredTool) else create_tool(t) for t in tools] - prompt = f"{base_prompt}\n\n" if base_prompt else "" - prompt += """You will be given a task to perform. You should output either -- a Python code snippet that provides the solution to the task, or a step towards the solution. Any output you want to extract from the code should be printed to the console. Code should be output in a fenced code block. -- text to be shown directly to the user, if you want to ask for more information or provide the final answer. - -In addition to the Python Standard Library, you can use the following functions: -""" - - for tool in tools: - schema = _get_tool_args_json_schema(tool) - if schema: - schema_str = json.dumps(schema, indent=2) - prompt += f"""The arguments for `{tool.name}` should follow this JSON schema: -```json -{schema_str} -``` -""" - prompt += f''' -def {tool.name}{str(inspect.signature(tool.func))}: - """{tool.description}""" - ... -''' - - prompt += """ - -Variables defined at the top level of previous code snippets can be referenced in your code. - -Reminder: use Python code snippets to call tools""" - return prompt - - -def create_codeact( - model: BaseChatModel, - tools: Sequence[Union[StructuredTool, Callable]], - eval_fn: Union[EvalFunction, EvalCoroutine], - *, - prompt: Optional[str] = None, - state_schema: StateSchemaType = CodeActState, -) -> StateGraph: - """Create a CodeAct agent. - - Args: - model: The language model to use for generating code - tools: List of tools available to the agent. Can be passed as python functions or StructuredTool instances. - eval_fn: Function or coroutine that executes code in a sandbox. Takes code string and locals dict, - returns a tuple of (stdout output, new variables dict) - prompt: Optional custom system prompt. If None, uses default prompt. - To customize default prompt you can use `create_default_prompt` helper: - `create_default_prompt(tools, "You are a helpful assistant.")` - state_schema: The state schema to use for the agent. - - Returns: - A StateGraph implementing the CodeAct architecture - """ - tools = [t if isinstance(t, StructuredTool) else create_tool(t) for t in tools] - - if prompt is None: - prompt = create_default_prompt(tools) - - # Make tools available to the code sandbox - tools_context = {tool.name: tool.func for tool in tools} - - async def call_model(state: StateSchema) -> Command: - messages = [{"role": "system", "content": prompt}] + state["messages"] - # Disable tool calling by binding no tools - model_without_tools = model - response = await model_without_tools.ainvoke(messages) - # Extract and combine all code blocks - content = response.content - reasoning_content = response.additional_kwargs.get('reasoning_content') - tracker.collect_step(step=Step(name="Raw_Assistant_Response", data=content)) - if not content or (reasoning_content and '```python' in reasoning_content): - content = reasoning_content or content - code = extract_and_combine_codeblocks(content) - if code: - tracker.collect_step(step=Step(name="Assistant_code", data=content)) - logger.debug( - f"\n{'=' * 50} ASSISTANT CODE {'=' * 50}\n{code}\n{'=' * 50} END ASSISTANT CODE {'=' * 50}" - ) - return Command(goto="sandbox", update={"messages": [response], "script": code}) - else: - # No code block found - check if asking to proceed - tracker.collect_step(step=Step(name="Assistant_nl", data=content)) - planning_response = response.content - - # should_auto_proceed = await check_if_asking_to_proceed(planning_response) - # Removed dead code: if False and should_auto_proceed check - - return Command( - update={"messages": [{"role": "assistant", "content": planning_response}], "script": None} - ) - - # If eval_fn is a async, we define async node function. - if inspect.iscoroutinefunction(eval_fn): - - async def sandbox(state: StateSchema, config: Optional[RunnableConfig] = None): - existing_context = state.get("context", {}) - context = {**existing_context, **tools_context} - # Execute the script in the sandbox - # Pass config to eval_fn if it accepts it - eval_fn_sig = inspect.signature(eval_fn) - if 'config' in eval_fn_sig.parameters: - output, new_vars = await eval_fn(state["script"], context, config=config) - else: - output, new_vars = await eval_fn(state["script"], context) - tracker.collect_step(step=Step(name="User_output", data=output)) - tracker.collect_step( - step=Step( - name="User_output_variables", - data=json.dumps( - new_vars, - default=lambda o: o.model_dump() if hasattr(o, "model_dump") else str(o), - ), - ) - ) - - # 📝 Code Execution Result - logger.debug( - f"\n\n------\n\n📝 Execution output:\n\n {output.strip()[: settings.advanced_features.execution_output_max_length]}{'...' if len(output.strip()) > settings.advanced_features.execution_output_max_length else ''} \n\n------\n\n" - ) - - # ────────────────────────────────────────────────────────────── - # 🔄 Context Update: Merging execution results with existing context - # ────────────────────────────────────────────────────────────── - - new_context = {**existing_context, **new_vars} - - # ────────────────────────────────────────────────────────────── - # 📤 Return: Formatting execution results for model consumption - # ────────────────────────────────────────────────────────────── - - # Return execution output as a user message so the model sees it - tracker.collect_step( - step=Step( - name="User_return", - data=f"Execution output:\n{output.strip()[: settings.advanced_features.execution_output_max_length]}{'...' if len(output.strip()) > settings.advanced_features.execution_output_max_length else ''}", - ) - ) - - return { - "messages": [ - { - "role": "user", - "content": f"Execution output:\n{output.strip()[: settings.advanced_features.execution_output_max_length]}{'...' if len(output.strip()) > settings.advanced_features.execution_output_max_length else ''}", - } - ], - "context": new_context, - } - else: - - def sandbox(state: StateSchema, config: Optional[RunnableConfig] = None): - existing_context = state.get("context", {}) - context = {**existing_context, **tools_context} - # Execute the script in the sandbox - # Pass config to eval_fn if it accepts it - eval_fn_sig = inspect.signature(eval_fn) - if 'config' in eval_fn_sig.parameters: - output, new_vars = eval_fn(state["script"], context, config=config) - else: - output, new_vars = eval_fn(state["script"], context) - new_context = {**existing_context, **new_vars} - # Return execution output as a user message so the model sees it - return { - "messages": [{"role": "user", "content": f"Execution output:\n{output}"}], - "context": new_context, - } - - agent = StateGraph(state_schema) - agent.add_node(call_model, destinations=(END, "sandbox")) - agent.add_node(sandbox) - agent.add_edge(START, "call_model") - agent.add_edge("sandbox", "call_model") - return agent diff --git a/src/cuga/backend/cuga_graph/nodes/api/code_agent/tests/test_code_act_agent.py b/src/cuga/backend/cuga_graph/nodes/api/code_agent/tests/test_code_act_agent.py deleted file mode 100644 index ab6f8b0d..00000000 --- a/src/cuga/backend/cuga_graph/nodes/api/code_agent/tests/test_code_act_agent.py +++ /dev/null @@ -1,58 +0,0 @@ -from typing import Any, Dict - -from langchain_core.tools import StructuredTool -from pydantic import Field, create_model - -from cuga.backend.cuga_graph.nodes.api.code_agent.code_act_agent import create_default_prompt - - -def create_test_tool_with_nested_schema(): - """Creates a sample tool with a nested object schema for testing.""" - - WhenModel = create_model( - "WhenModel", - start=(str, Field(..., description="Start time in ISO 8601 format")), - end=(str, Field(..., description="End time in ISO 8601 format")), - ) - - EventModel = create_model( - "EventModel", - title=(str, Field(..., description="Title of the event")), - when=(WhenModel, Field(..., description="Timing of the event")), - ) - - InputModel = create_model( - "create_eventInput", - event=(EventModel, Field(..., description="The event to create")), - ) - - def create_event(event: Dict[str, Any]) -> str: - """ - Creates a new calendar event. - """ - return f"Event '{event.get('title')}' created." - - tool = StructuredTool.from_function( - func=create_event, - name="create_event", - description="Creates a new calendar event.", - args_schema=InputModel, - ) - return tool - - -def test_create_default_prompt_includes_json_schema_for_nested_models(): - """Test that create_default_prompt includes nested model schema details.""" - tool = create_test_tool_with_nested_schema() - prompt = create_default_prompt(tools=[tool]) - - assert "The arguments for `create_event` should follow this JSON schema:" in prompt - assert "```json" in prompt - assert "EventModel" in prompt - assert "WhenModel" in prompt - assert '"event"' in prompt - assert '"title"' in prompt - assert '"when"' in prompt - assert '"start"' in prompt - assert '"end"' in prompt - assert "def create_event(event: Dict[str, Any]) -> str:" in prompt diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/__init__.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/code_extraction.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/code_extraction.py new file mode 100644 index 00000000..88be6caa --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/code_extraction.py @@ -0,0 +1,89 @@ +"""Shared fenced-code extraction and tool awaitable-wrapping utilities. + +Previously duplicated (and drifted) across ``cuga_lite_graph`` and +``cuga_supervisor_graph``. The canonical behavior here is Cuga Lite's: +fenced ```python blocks are returned even without a +``print(`` call (the ``print(`` gate applies only to the no-fence raw-text +fallback), and ``make_tool_awaitable`` always normalizes Pydantic results +and always returns a coroutine function. +""" + +from __future__ import annotations + +import asyncio +import inspect +import re +from typing import Any, Callable, Optional + +from pydantic import BaseModel + +BACKTICK_PATTERN = r'```python(.*?)```' + + +def extract_and_combine_codeblocks(text: str) -> str: + """Extract all ```python codeblocks from text and combine them. + + If fenced blocks are present they are returned joined by a blank line + with no further filtering. Otherwise the raw text is treated as code + only if it contains ``print(`` and compiles (``await`` is stripped for + the compile check only; the original text is returned). + """ + code_blocks = re.findall(BACKTICK_PATTERN, text, re.DOTALL) + + if code_blocks: + return "\n\n".join(block.strip() for block in code_blocks) + + stripped_text = text.strip() + + if "print(" not in stripped_text: + return "" + + try: + compile(stripped_text.replace('await ', ''), '', 'exec') + return stripped_text + except SyntaxError: + return "" + + +def extract_code_from_model_response(content: Optional[str], reasoning_content: Optional[str]) -> str: + """Extract code from a model response, falling back to reasoning. + + Tries fenced/raw code in ``content`` first; only if that yields nothing + does it look at ``reasoning_content``. Mirrors the (previously + duplicated) logic in the Lite and Supervisor loop nodes. + """ + code = extract_and_combine_codeblocks(content) if content else "" + if not code and reasoning_content: + code = extract_and_combine_codeblocks(reasoning_content) + return code + + +def make_tool_awaitable(func: Callable[..., Any]) -> Callable[..., Any]: + """Wrap a tool function so it is always awaitable. + + Sync functions are run in the default executor; async functions are + wrapped (never returned as-is). In both cases a Pydantic ``BaseModel`` + return value is converted to a dict via ``.model_dump()``. + """ + + async def wrapper_with_pydantic(*args: Any, **kwargs: Any) -> Any: + result = await func(*args, **kwargs) if inspect.iscoroutinefunction(func) else func(*args, **kwargs) + + if isinstance(result, BaseModel): + return result.model_dump() + + return result + + if inspect.iscoroutinefunction(func): + return wrapper_with_pydantic + + async def async_wrapper(*args: Any, **kwargs: Any) -> Any: + loop = asyncio.get_event_loop() + result = await loop.run_in_executor(None, lambda: func(*args, **kwargs)) + + if isinstance(result, BaseModel): + return result.model_dump() + + return result + + return async_wrapper diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/execution_policy.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/execution_policy.py new file mode 100644 index 00000000..224eb330 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/execution_policy.py @@ -0,0 +1,141 @@ +"""Three-backend execution routing. + +``ExecutionRouter.resolve`` turns the current (legacy) ``advanced_features`` +settings into an explicit :class:`ExecutionPlan` with three independent +axes: ``python_backend``, ``shell_backend`` and ``filesystem_backend``. + +This is intentionally *behavior-preserving*: ``python_backend`` reproduces +exactly the implicit decision today in +``CodeExecutor.eval_with_tools_async`` ("e2b" iff +``advanced_features.e2b_sandbox``, else "local"). The shell/filesystem axes +merely describe existing flag behavior; no graph consumes the plan yet. +""" + +from __future__ import annotations + +from typing import Any, List, Literal, Optional + +from pydantic import BaseModel, Field + +PythonBackend = Literal["local", "e2b"] +ShellBackend = Literal["none", "local", "native", "opensandbox", "e2b"] +FilesystemBackend = Literal["none", "host", "sandbox_remote"] + + +class ExecutionPlan(BaseModel): + """Explicit, prompt-/log-visible description of where execution happens.""" + + requested_backend: str + python_backend: PythonBackend + shell_backend: ShellBackend + filesystem_backend: FilesystemBackend + local_control_tools: List[str] = Field(default_factory=lambda: ["find_tools", "load_skill"]) + fallbacks: List[str] = Field(default_factory=list) + workspace_root: str = "/workspace" + variable_transfer_policy: str = "namespace" + + @property + def split_execution_active(self) -> bool: + """True when generated Python runs locally while shell/FS run remotely.""" + remote_shell = self.shell_backend in ("native", "opensandbox", "e2b") + remote_fs = self.filesystem_backend == "sandbox_remote" + return self.python_backend == "local" and (remote_shell or remote_fs) + + +def split_execution_note(plan: "ExecutionPlan") -> str: + """Return a prompt-visible note when Python and shell/FS run in different environments. + + Returns an empty string when split execution is not active so callers can + safely append it without extra conditionals. + """ + if not plan.split_execution_active: + return "" + return ( + "**Split-execution mode is active**: your Python code runs in the local environment, " + "but shell commands (`run_command`) and filesystem operations (`read_file`, `write_file`, " + "`list_files`, etc.) execute inside the remote sandbox. " + "Use `await run_command(...)` for anything that must happen inside the sandbox; " + "avoid `os.path`, `open()`, or other local-filesystem calls when targeting sandbox paths." + ) + + +class ExecutionRouter: + """Resolves settings (+ optional explicit overrides) into an ExecutionPlan.""" + + @staticmethod + def resolve( + settings: Any, + *, + mode: Optional[PythonBackend] = None, + workspace_root: Optional[str] = None, + ) -> ExecutionPlan: + adv = settings.advanced_features + exec_cfg = getattr(settings, "execution", None) + fallbacks: List[str] = [] + + # Phase 6: explicit execution.* settings take priority over advanced_features. + # None means "not set" — fall through to the legacy flag logic below. + explicit_python: Optional[PythonBackend] = getattr(exec_cfg, "python_backend", None) + explicit_shell: Optional[ShellBackend] = getattr(exec_cfg, "shell_backend", None) + explicit_fs: Optional[FilesystemBackend] = getattr(exec_cfg, "filesystem_backend", None) + explicit_root: Optional[str] = getattr(exec_cfg, "workspace_root", None) + + # ── python_backend ────────────────────────────────────────────────── + e2b = bool(getattr(adv, "e2b_sandbox", False)) + settings_choice: PythonBackend = ( + explicit_python if explicit_python is not None else ("e2b" if e2b else "local") + ) + if mode is not None: + python_backend: PythonBackend = mode + if mode != settings_choice: + fallbacks.append( + f"python_backend forced to '{mode}' by explicit mode " + f"(settings would select '{settings_choice}')" + ) + else: + python_backend = settings_choice + + # ── legacy deprecation: warn when execution.* overrides disagree with advanced_features ── + if explicit_python is not None and e2b and explicit_python != "e2b": + fallbacks.append( + "advanced_features.e2b_sandbox=True is deprecated when execution.python_backend " + f"is set; e2b_sandbox is ignored (execution.python_backend='{explicit_python}' wins). " + "Remove advanced_features.e2b_sandbox from your settings." + ) + + # ── shell_backend ─────────────────────────────────────────────────── + _legacy_shell_on = bool(getattr(adv, "enable_shell_tool", False)) + if explicit_shell is not None: + shell_backend: ShellBackend = explicit_shell + if _legacy_shell_on: + fallbacks.append( + "advanced_features.enable_shell_tool=True is deprecated when execution.shell_backend " + f"is set; enable_shell_tool is ignored (execution.shell_backend='{explicit_shell}' wins). " + "Remove advanced_features.enable_shell_tool from your settings." + ) + elif _legacy_shell_on: + sandbox_mode = str(getattr(adv, "sandbox_mode", "native") or "native") + shell_backend = ( + sandbox_mode if sandbox_mode in ("local", "native", "opensandbox", "e2b") else "native" + ) + else: + shell_backend = "none" + + # ── filesystem_backend ────────────────────────────────────────────── + if explicit_fs is not None: + filesystem_backend: FilesystemBackend = explicit_fs + elif bool(getattr(adv, "enable_filesystem_tools", False)): + filesystem_backend = "sandbox_remote" if shell_backend == "opensandbox" else "host" + else: + filesystem_backend = "none" + + requested_backend = mode if mode is not None else python_backend + + return ExecutionPlan( + requested_backend=requested_backend, + python_backend=python_backend, + shell_backend=shell_backend, + filesystem_backend=filesystem_backend, + fallbacks=fallbacks, + workspace_root=workspace_root or explicit_root or "/workspace", + ) diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph_nodes.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph_nodes.py new file mode 100644 index 00000000..58489b5a --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph_nodes.py @@ -0,0 +1,280 @@ +"""Shared agent-loop helpers and CoreGraphAdapter ABC. + +``append_chat_messages_with_step_limit`` and ``create_error_command`` were +duplicated verbatim (modulo a couple of graph-specific bits) in +``cuga_lite_graph`` and ``cuga_supervisor_graph``. The graph-specific bits +are isolated behind :class:`CoreGraphAdapter`; the loop logic, the error +text and the log lines are shared and behavior-identical to the originals. + +``CoreGraphAdapter`` also carries four optional call_model hooks that the +shared ``create_call_model_node`` factory (``shared_nodes.py``) delegates to. +Subclasses override only the hooks they need; defaults are no-ops. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional, Tuple + +from langchain_core.messages import AIMessage, BaseMessage, HumanMessage +from langgraph.graph import END +from langgraph.types import Command +from loguru import logger + + +class CoreGraphAdapter(ABC): + """Graph-specific seam for the shared loop helpers. + + Subsequent slices (4b) extend this with prepare/call/execute hooks; for + now it only carries what the step-limit + error-command helpers need. + """ + + #: State key the error Command writes the message list back to. + messages_key: str + + #: Approval seams — defaults are exactly the legacy Lite values so the + #: adapter-ized ToolApprovalHandler stays byte-identical for Lite. + metadata_key: str = "cuga_lite_metadata" + execute_node_name: str = "sandbox" + sender_name: str = "CugaLite" + + @abstractmethod + def get_messages(self, state: Any) -> List[BaseMessage]: + """Base message list to append onto (already None-safe).""" + + @abstractmethod + def resolve_max_steps(self, state: Any, override: Optional[int]) -> int: + """Resolve the step limit for this graph (override wins when given).""" + + def get_variable_manager(self, state: Any) -> Any: + """Variable manager new execution vars are recorded into. + + Defaults to ``state.variables_manager`` (Lite). Supervisor must + override to return ``state.supervisor_variables_manager`` — the + seam phase 9 uses to fix the supervisor variable-coupling bug. + """ + return getattr(state, "variables_manager", None) + + def get_metadata(self, state: Any) -> dict: + """Approval/policy metadata dict (Lite: ``cuga_lite_metadata``; + Supervisor overrides ``metadata_key`` to ``supervisor_metadata``).""" + return getattr(state, self.metadata_key, None) or {} + + def set_metadata(self, state: Any, metadata: dict) -> None: + setattr(state, self.metadata_key, metadata) + + # ── Optional call_model hooks ────────────────────────────────────────── + # Defaults are no-ops so the Supervisor adapter (which has none of these + # features) requires no changes. AgentGraphAdapter (Lite) overrides all + # hooks it needs to restore full Lite behaviour in the shared call_model. + + # ── Pre-invocation hooks ─────────────────────────────────────────────── + + def get_few_shot_messages(self, state: Any) -> List[BaseMessage]: + """Return graph-specific few-shot example messages prepended to the + model call. Default: empty list (Supervisor behaviour).""" + return [] + + def get_pi(self, state: Any) -> Optional[str]: + """Return the Personal Instructions string to inject into the first + user message, or ``None`` to skip injection. Default: ``None``.""" + return None + + def prepare_system_content(self, state: Any, configurable: dict, base_prompt: str) -> str: + """Augment the system prompt before the model call (e.g. add todos). + Default: returns ``base_prompt`` unchanged (Supervisor behaviour).""" + return base_prompt + + def get_variables_storage(self, state: Any) -> Optional[Any]: + """Return the variables-storage dict for context summarisation. + Default: ``state.variables_storage``; Supervisor uses a different key.""" + return getattr(state, "variables_storage", None) + + def get_tracker(self) -> Optional[Any]: + """Return the ActivityTracker for context summarisation, or ``None``. + Default: ``None`` (Supervisor has no tracker).""" + return None + + def get_invoke_config(self, configurable: dict) -> dict: + """Return the ``config`` dict passed to ``model.ainvoke``. + Default: ``{}``; Lite overrides to include Langfuse callbacks.""" + return {} + + async def resolve_bind_tools(self, state: Any, active_model: Any, configurable: dict) -> Any: + """Return a model instance with tools bound, or ``None`` to use the + plain model without binding. Default: ``None`` (Supervisor never + binds tools; only AgentGraphAdapter overrides this).""" + return None + + async def ainvoke_model(self, bound: Any, messages: list, invoke_config: dict) -> Any: + """Invoke the bound model and return its response. + + Default: ``await bound.ainvoke(messages, config=invoke_config)``. + Lite overrides to handle ``tool_use_failed`` errors (proxy → native + tool-call conversion) by extracting code from the exception payload. + """ + return await bound.ainvoke(messages, config=invoke_config) + + # ── Post-invocation hooks ────────────────────────────────────────────── + + def normalize_response(self, response: Any) -> Tuple[str, Optional[str]]: + """Extract ``(content, reasoning)`` from the model response. + + Default passes through ``response.content`` and the + ``reasoning_content`` additional kwarg unchanged. + Lite overrides to run ``normalize_assistant_text`` and recover + tool-call code from proxy responses. + """ + content = response.content or "" + reasoning = (getattr(response, "additional_kwargs", None) or {}).get("reasoning_content") + return content, reasoning + + def on_response_processed(self, state: Any, code: Optional[str], content: str) -> None: + """Side-effect hook called after code extraction. Default: no-op. + Lite uses this to record tracker steps.""" + + def build_metadata_update(self, state: Any, *, playbook_fired: bool) -> dict: + """Return the *value* (not the full key/value pair) for the metadata + state key after a call_model response. + + Default adds ``playbook_guidance_added: True`` when a playbook fired; + Lite overrides to also run ``_clean_empty_response_retry_meta``. + """ + meta = self.get_metadata(state) + if playbook_fired: + return {**meta, "playbook_guidance_added": True} + return meta + + async def classify_auto_continue( + self, state: Any, model: Any, content: str, reasoning: Optional[str] + ) -> bool: + """Return ``True`` when the NL response should loop back automatically. + Default: ``False`` (Supervisor never auto-continues). + Lite overrides with ``classify_nl_auto_continue``.""" + return False + + +def append_chat_messages_with_step_limit( + adapter: CoreGraphAdapter, + state: Any, + new_messages: List[BaseMessage], + max_steps: Optional[int] = None, +) -> Tuple[List[BaseMessage], Optional[AIMessage]]: + """Append messages, counting a step and enforcing the limit. + + Returns ``(updated_messages, error_message_or_None)``. On limit breach + the error AIMessage is also appended to the returned list. + """ + limit = adapter.resolve_max_steps(state, max_steps) + new_step_count = state.step_count + 1 + base = adapter.get_messages(state) + + if new_step_count > limit: + error_msg = ( + f"Maximum step limit ({limit}) reached. " + f"The task has exceeded the allowed number of execution cycles. " + f"Please simplify your request or break it into smaller tasks." + ) + logger.warning(f"Step limit reached: {new_step_count} > {limit}") + error_ai_message = AIMessage(content=error_msg) + return base + new_messages + [error_ai_message], error_ai_message + + logger.debug(f"Step count: {new_step_count}/{limit}") + return base + new_messages, None + + +def execution_output_text(output: str) -> str: + """The execution-feedback message body shared by both execute nodes.""" + return f"Execution output:\n{output}" + + +def enforce_step_limit( + adapter: CoreGraphAdapter, + *, + state: Any, + messages: List[BaseMessage], + new_step_count: int, + limit: int, +) -> Optional[Command]: + """In-``call_model`` step guard for the code / no-code branches. + + Unlike :func:`append_chat_messages_with_step_limit` (which rebuilds the + base list from the adapter), the call_model branches have already built + the post-response message list, so it is passed in explicitly. ``limit`` + is also passed explicitly so each callsite keeps its exact current + resolution (Lite and Supervisor's call_model checks differ). Returns an + END error Command (with the canned error appended) when the limit is + exceeded, else ``None`` so the caller proceeds with its own routing. + """ + if new_step_count > limit: + error_msg = ( + f"Maximum step limit ({limit}) reached. " + f"The task has exceeded the allowed number of execution cycles. " + f"Please simplify your request or break it into smaller tasks." + ) + logger.warning(f"Step limit reached: {new_step_count} > {limit}") + error_ai_message = AIMessage(content=error_msg) + return create_error_command( + adapter, messages + [error_ai_message], error_ai_message, state.step_count + ) + return None + + +def inject_playbook_guidance( + messages: List[BaseMessage], + metadata: Optional[dict], +) -> List[BaseMessage]: + """Inject playbook guidance into the last HumanMessage (first call only). + + Returns the original list unchanged when conditions are not met. + When injection fires, returns a new list so the caller's original is + not mutated. + """ + if not metadata: + return messages + if not metadata.get("policy_matched"): + return messages + if metadata.get("policy_type") != "playbook": + return messages + if metadata.get("playbook_guidance_added"): + return messages + guidance = metadata.get("playbook_guidance") + if not guidance: + return messages + + last_human_idx: Optional[int] = None + for i in range(len(messages) - 1, -1, -1): + msg = messages[i] + if isinstance(msg, HumanMessage) or getattr(msg, "type", None) in ("human", "user"): + last_human_idx = i + break + + if last_human_idx is None: + return messages + + result = list(messages) + original = result[last_human_idx] + result[last_human_idx] = HumanMessage(content=f"{original.content}\n\n## Task Guidance\n{guidance}") + return result + + +def create_error_command( + adapter: CoreGraphAdapter, + updated_messages: List[BaseMessage], + error_message: AIMessage, + step_count: int, + additional_updates: Optional[Dict[str, Any]] = None, +) -> Command: + """Create a Command routing to END with error state.""" + updates: Dict[str, Any] = { + adapter.messages_key: updated_messages, + "script": None, + "final_answer": error_message.content, + "execution_complete": True, + "error": error_message.content, + "step_count": step_count + 1, + } + if additional_updates: + updates.update(additional_updates) + + return Command(goto=END, update=updates) diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/runtime_tools.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/runtime_tools.py new file mode 100644 index 00000000..e0ac4672 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/runtime_tools.py @@ -0,0 +1,155 @@ +"""Shared runtime tool-injection orchestrator. + +Extracted verbatim (behavior-preserving) from the inline block in +``cuga_lite_graph``. This module *orchestrates* the existing packages — it +never re-implements a filesystem or shell tool: + +- filesystem: ``executors.filesystem.create_filesystem_tools`` with either + the default host backend or a ``RemoteSandboxBackend``; +- shell: the existing ``*SandboxExecutor.create_sandbox_tools`` (returns + only ``run_command``). + +``resolve_runtime_backends`` reproduces the exact legacy gating so Lite, +Supervisor and Chat can share one injection path without behavior drift. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Dict, List, Literal, Optional + +from loguru import logger + +from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import AppDefinition + +FilesystemChoice = Literal["none", "host", "sandbox_remote"] +ShellChoice = Literal["none", "local", "native", "opensandbox"] + + +@dataclass(frozen=True) +class RuntimeBackends: + """Resolved injection decision (independent filesystem + shell axes).""" + + filesystem: FilesystemChoice + shell: ShellChoice + + +@dataclass +class ToolBundle: + """Tools split into prompt-facing vs execution-namespace, plus app meta.""" + + prompt_tools: List[Any] = field(default_factory=list) + execution_callables: Dict[str, Any] = field(default_factory=dict) + app_definitions: List[AppDefinition] = field(default_factory=list) + + +def prompt_tool_dicts(tools: List[Any]) -> List[Dict[str, Any]]: + """Render runtime ``StructuredTool``s into Supervisor's jinja tool dicts. + + Supervisor's prompt template iterates plain dicts (``name``, + ``params_str``, ``description``, ``params_doc``, ``response_doc``), not + LangChain tools, so runtime tools must be converted before they can be + listed for the model. + """ + dicts: List[Dict[str, Any]] = [] + for t in tools: + try: + arg_names = list(getattr(t, "args", {}) or {}) + except Exception: + arg_names = [] + dicts.append( + { + "name": t.name, + "description": t.description or "", + "params_str": ", ".join(arg_names), + "params_doc": "\n".join(f"- {n}" for n in arg_names) + if arg_names + else "No parameters required", + "response_doc": "", + } + ) + return dicts + + +def resolve_runtime_backends(settings: Any, configurable: Dict[str, Any]) -> RuntimeBackends: + """Reproduce exactly the legacy gating inlined in ``cuga_lite_graph``.""" + adv = settings.advanced_features + _sandbox_mode = getattr(adv, "sandbox_mode", "opensandbox") + _shell_tool_on = getattr(adv, "enable_shell_tool", False) + _fs_tool_on = ( + configurable["enable_filesystem_tools"] + if "enable_filesystem_tools" in configurable + else getattr(adv, "enable_filesystem_tools", False) + ) + _opensandbox_on = getattr(adv, "opensandbox_sandbox", False) + _use_sandbox = _shell_tool_on and ( + (_sandbox_mode == "native") + or (_sandbox_mode == "opensandbox" and _opensandbox_on) + or (_sandbox_mode == "local") + ) + + if not _fs_tool_on: + filesystem: FilesystemChoice = "none" + elif _use_sandbox and _sandbox_mode == "opensandbox": + filesystem = "sandbox_remote" + else: + filesystem = "host" + + shell: ShellChoice = _sandbox_mode if _use_sandbox else "none" + return RuntimeBackends(filesystem=filesystem, shell=shell) + + +def build_runtime_tools(*, thread_id: Optional[str], backends: RuntimeBackends) -> ToolBundle: + """Orchestrate filesystem + shell tool creation into a ToolBundle. + + Mirrors the original ``cuga_lite_graph`` injection (same backend + selection, same ``coroutine or func`` extraction, same log lines). + """ + bundle = ToolBundle() + + if backends.filesystem != "none": + import cuga.backend.cuga_graph.nodes.cuga_lite.executors.filesystem as fs_pkg + + fs_backend = None + if backends.filesystem == "sandbox_remote": + from cuga.backend.cuga_graph.nodes.cuga_lite.executors import CodeExecutor + + fs_backend = fs_pkg.RemoteSandboxBackend(CodeExecutor._get_opensandbox_executor(), thread_id) + + fs_tools = fs_pkg.create_filesystem_tools(thread_id, backend=fs_backend) + for ft in fs_tools: + fn = ft.coroutine or ft.func + if fn: + bundle.execution_callables[ft.name] = fn + bundle.prompt_tools.extend(fs_tools) + bundle.app_definitions.append( + AppDefinition( + name="filesystem", + type="runtime", + description="Workspace filesystem tools: read, write, edit, list, search, move files and directories.", + ) + ) + logger.info(f"Injected filesystem tools (thread_id={thread_id!r}): {[t.name for t in fs_tools]}") + + if backends.shell != "none": + from cuga.backend.cuga_graph.nodes.cuga_lite.executors import CodeExecutor + + if backends.shell == "native": + sandbox_executor = CodeExecutor._get_native_executor() + sandbox_label = "NativeSandbox" + elif backends.shell == "local": + sandbox_executor = CodeExecutor._get_local_sandbox_executor() + sandbox_label = "LocalSandbox" + else: + sandbox_executor = CodeExecutor._get_opensandbox_executor() + sandbox_label = "OpenSandbox" + + run_cmd_tools = sandbox_executor.create_sandbox_tools(thread_id=thread_id) + for st in run_cmd_tools: + fn = st.coroutine or st.func + if fn: + bundle.execution_callables[st.name] = fn + bundle.prompt_tools.extend(run_cmd_tools) + logger.info(f"[{sandbox_label}] Injected run_command (thread_id={thread_id!r})") + + return bundle diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/shared_graph.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/shared_graph.py new file mode 100644 index 00000000..20b795cd --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/shared_graph.py @@ -0,0 +1,59 @@ +"""Shared agent graph builder. + +``build_agent_graph`` wires the canonical 3-node agent graph structure: + + START → prepare → call_model ↔ execute (loop) → END + +Both CugaLite and CugaSupervisor share this structure. The nodes themselves +are provided by the caller (produced by adapter factories), so the graph +builder stays graph-agnostic. + +The returned graph is UNCOMPILED — callers are responsible for calling +``.compile(checkpointer=...)`` so each call-site can supply its own +checkpointer (e.g. the SDK applies thread-scoped memory at runtime). +""" + +from __future__ import annotations + +from typing import Callable, Type + +from langgraph.graph import START, StateGraph + +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import CoreGraphAdapter + + +def build_agent_graph( + *, + adapter: CoreGraphAdapter, + state_class: Type, + prepare_node: Callable, + call_model_node: Callable, + execute_node: Callable, +) -> StateGraph: + """Wire and return an UNCOMPILED 3-node agent StateGraph. + + Args: + adapter: Graph-specific seam; ``adapter.execute_node_name`` is used + as the name of the third node so both graphs keep their existing + node names (``sandbox`` / ``execute_agent_tool``). + state_class: The Pydantic state class for the graph + (``CugaLiteState`` or ``CugaSupervisorState``). + prepare_node: Async node function for the prepare step. + call_model_node: Async node function for the call_model step (use + ``create_call_model_node`` from ``shared_nodes.py``). + execute_node: Async node function for the execute/sandbox step. + + Returns: + An uncompiled ``StateGraph``. Call ``.compile(checkpointer=...)`` + to produce the runnable graph. + """ + graph = StateGraph(state_class) + + graph.add_node("prepare", prepare_node) + graph.add_node("call_model", call_model_node) + graph.add_node(adapter.execute_node_name, execute_node) + + graph.add_edge(START, "prepare") + graph.add_edge("prepare", "call_model") + + return graph diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/shared_nodes.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/shared_nodes.py new file mode 100644 index 00000000..a79b10e1 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/shared_nodes.py @@ -0,0 +1,249 @@ +"""Shared call_model node factory. + +``create_call_model_node`` produces the async ``call_model`` node used by +both CugaLite and CugaSupervisor graphs. Graph-specific behaviour is +delegated through :class:`CoreGraphAdapter` hooks so the factory itself +contains only logic that is identical across both graphs. + +Differences handled via hooks: +- Few-shot messages: ``adapter.get_few_shot_messages`` +- Personal Instructions: ``adapter.get_pi`` +- System content augmentation (todos): ``adapter.prepare_system_content`` +- Variable storage key: ``adapter.get_variables_storage`` +- Activity tracker: ``adapter.get_tracker`` +- Langfuse callbacks: ``adapter.get_invoke_config`` +- Bind-tools model: ``adapter.resolve_bind_tools`` +- Response normalisation: ``adapter.normalize_response`` +- Tracker side-effects: ``adapter.on_response_processed`` +- Metadata update: ``adapter.build_metadata_update`` +- NL auto-continue: ``adapter.classify_auto_continue`` +""" + +from __future__ import annotations + +from typing import Any, Callable, Optional + +from langchain_core.messages import AIMessage, HumanMessage +from langgraph.graph import END +from langgraph.types import Command +from loguru import logger + +from cuga.backend.cuga_graph.nodes.cuga_agent_core.code_extraction import ( + extract_code_from_model_response, +) +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import ( + CoreGraphAdapter, + enforce_step_limit, +) +from cuga.backend.cuga_graph.nodes.cuga_lite.tool_approval_handler import ToolApprovalHandler +from cuga.backend.cuga_graph.utils.context_management_utils import apply_context_summarization + + +def create_call_model_node( + adapter: CoreGraphAdapter, + base_model: Any, + settings: Any, +) -> Callable: + """Return an async ``call_model`` node function parameterised by *adapter*. + + Args: + adapter: Graph-specific seam providing hook implementations. + base_model: Default LLM; can be overridden at runtime via + ``config["configurable"]["llm"]``. + settings: Application settings object (policy, advanced_features, …). + """ + + async def call_model(state: Any, config: Any = None) -> Command: + configurable: dict = config.get("configurable", {}) if config else {} + + # ── Tool-approval HITL resumption ────────────────────────────────── + if settings.policy.enabled and ToolApprovalHandler.is_returning_from_approval(adapter, state): + return ToolApprovalHandler.handle_approval_resumption(adapter, state) + + # ── Resolve active model ─────────────────────────────────────────── + active_model = configurable.get("llm") or base_model + + # ── System content (may be augmented, e.g. with todos) ───────────── + base_prompt: str = getattr(state, "prepared_prompt", "") or "" + system_content: str = adapter.prepare_system_content(state, configurable, base_prompt) + + # ── Context summarisation ────────────────────────────────────────── + effective_messages = await apply_context_summarization( + adapter.get_messages(state) or [], + active_model, + system_prompt=base_prompt, + tools=None, + tracker=adapter.get_tracker(), + variables_storage=adapter.get_variables_storage(state), + variable_counter_state=getattr(state, "variable_counter_state", None), + variable_creation_order=getattr(state, "variable_creation_order", None), + message_list_name=adapter.messages_key, + ) + + # ── Build messages_for_model: [system] + few-shot + conversation ─── + messages_for_model: list = [{"role": "system", "content": system_content}] + + for example in adapter.get_few_shot_messages(state): + if isinstance(example, dict): + role = (example.get("role") or "").strip().lower() + ex_content = example.get("content") or "" + if role in {"user", "assistant"} and ex_content: + messages_for_model.append({"role": role, "content": ex_content}) + + # ── Variables summary ────────────────────────────────────────────── + var_manager = adapter.get_variable_manager(state) + variables_summary_text: Optional[str] = None + variables_addendum = "" + if var_manager is not None: + existing_var_names = var_manager.get_variable_names() + if existing_var_names: + variables_summary_text = var_manager.get_variables_summary(variable_names=existing_var_names) + variables_addendum = ( + f"\n\n## Available Variables\n\n{variables_summary_text}" + f"\n\nYou can use these variables directly by their names." + ) + + # ── Playbook guidance (first call only) ──────────────────────────── + metadata = adapter.get_metadata(state) + playbook_guidance: Optional[str] = None + if ( + settings.policy.enabled + and metadata.get("policy_matched") + and metadata.get("policy_type") == "playbook" + and not metadata.get("playbook_guidance_added") + ): + playbook_guidance = metadata.get("playbook_guidance") + if playbook_guidance: + logger.info("Will inject playbook guidance into last user message (first time only)") + + # ── Process messages: inject PI / playbook / variables ───────────── + pi = adapter.get_pi(state) + pi_added = False + playbook_fired = False + modified_messages: list = [] + + for i, msg in enumerate(effective_messages): + is_last = i == len(effective_messages) - 1 + msg_role = getattr(msg, "type", None) + is_human = isinstance(msg, HumanMessage) or msg_role in ("human", "user") + is_ai = isinstance(msg, AIMessage) or msg_role in ("ai", "assistant") + + if is_human: + content = msg.content if hasattr(msg, "content") else (msg.get("content") or "") + modified = False + + if pi and not pi_added and "## User Context" not in content and len(effective_messages) == 1: + content = f"{content}\n\n## User Context\n{pi}" + pi_added = True + modified = True + + if playbook_guidance and is_last: + content = f"{content}\n\n## Task Guidance\n{playbook_guidance}" + modified = True + playbook_fired = True + + if variables_summary_text and is_last: + content = content + variables_addendum + modified = True + + modified_messages.append(HumanMessage(content=content) if modified else msg) + messages_for_model.append({"role": "user", "content": content}) + + elif is_ai: + modified_messages.append(msg) + ai_content = msg.content if hasattr(msg, "content") else (msg.get("content") or "") + messages_for_model.append({"role": "assistant", "content": ai_content}) + + else: + modified_messages.append(msg) + logger.warning("call_model: skipping message %d with unknown role: %s", i, msg_role) + + logger.info( + "call_model: %d messages → model (%s)", + len(messages_for_model), + adapter.sender_name, + ) + + # ── Resolve bound model (bind-tools, Lite-only) ──────────────────── + bound = await adapter.resolve_bind_tools(state, active_model, configurable) or active_model + + # ── Model invocation ─────────────────────────────────────────────── + invoke_config = adapter.get_invoke_config(configurable) + response = await adapter.ainvoke_model(bound, messages_for_model, invoke_config) + + # ── Normalise response ───────────────────────────────────────────── + content, reasoning = adapter.normalize_response(response) + + # ── Extract code ─────────────────────────────────────────────────── + code = extract_code_from_model_response(content, reasoning) + + adapter.on_response_processed(state, code, content) + + # ── Tool-approval interrupt for generated code ───────────────────── + if code and settings.policy.enabled: + approval_command = await ToolApprovalHandler.check_and_create_approval_interrupt( + adapter, state, code, content, config + ) + if approval_command: + return approval_command + + # ── Build final message list + step count ────────────────────────── + final_messages: list = modified_messages + [AIMessage(content=content)] + new_step_count: int = state.step_count + 1 + + # ── Step limit enforcement ───────────────────────────────────────── + max_steps = adapter.resolve_max_steps(state, configurable.get("cuga_lite_max_steps")) + limit_cmd = enforce_step_limit( + adapter, + state=state, + messages=final_messages, + new_step_count=new_step_count, + limit=max_steps, + ) + if limit_cmd is not None: + return limit_cmd + + # ── Metadata update ──────────────────────────────────────────────── + meta_value = adapter.build_metadata_update(state, playbook_fired=playbook_fired) + meta_update = {adapter.metadata_key: meta_value} + + # ── Route: code → execute node; text → END or auto-continue ──────── + if code: + return Command( + goto=adapter.execute_node_name, + update={ + adapter.messages_key: final_messages, + "script": code, + "step_count": new_step_count, + **meta_update, + }, + ) + + should_continue = await adapter.classify_auto_continue(state, active_model, content, reasoning) + if should_continue: + logger.info("%s: NL response classified as interim — auto-continuing", adapter.sender_name) + return Command( + goto="call_model", + update={ + adapter.messages_key: final_messages + [HumanMessage(content="continue")], + "script": None, + "final_answer": "", + "execution_complete": False, + "step_count": new_step_count, + **meta_update, + }, + ) + + return Command( + goto=END, + update={ + adapter.messages_key: final_messages, + "script": None, + "final_answer": content, + "execution_complete": True, + "step_count": new_step_count, + **meta_update, + }, + ) + + return call_model diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/__init__.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_code_extraction.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_code_extraction.py new file mode 100644 index 00000000..7294a1ae --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_code_extraction.py @@ -0,0 +1,99 @@ +"""Focused unit guards for the shared code-extraction utilities. + +Comprehensive extract cases live in +``cuga_lite/executors/tests/test_extract_codeblocks.py`` and the +make_tool_awaitable integration cases in +``cuga_lite/executors/tests/test_sync_async_tools.py`` (both repointed to +this module). This file only adds what those suites do NOT cover: + +- The unification-decision regression guard: fenced ```python blocks are + returned even with no ``print(`` call (canonical Lite behavior — the + user explicitly chose this over Supervisor/code_act's print() gate). +- Direct (non-integration) contract tests for ``make_tool_awaitable``: + always a coroutine function, Pydantic results ``.model_dump()``-ed. +""" + +from __future__ import annotations + +import asyncio + +from pydantic import BaseModel + +from cuga.backend.cuga_graph.nodes.cuga_agent_core.code_extraction import ( + extract_and_combine_codeblocks, + extract_code_from_model_response, + make_tool_awaitable, +) + + +def test_model_response_prefers_content_over_reasoning() -> None: + code = extract_code_from_model_response( + "```python\nprint('from content')\n```", + "```python\nprint('from reasoning')\n```", + ) + assert code == "print('from content')" + + +def test_model_response_falls_back_to_reasoning_when_content_has_no_code() -> None: + code = extract_code_from_model_response( + "just prose, no code here", + "```python\nprint('from reasoning')\n```", + ) + assert code == "print('from reasoning')" + + +def test_model_response_empty_content_uses_reasoning() -> None: + assert extract_code_from_model_response("", "```python\nx = compute()\n```") == "x = compute()" + assert extract_code_from_model_response(None, "```python\nx = compute()\n```") == "x = compute()" + + +def test_model_response_no_code_anywhere_returns_empty() -> None: + assert extract_code_from_model_response("prose only", None) == "" + assert extract_code_from_model_response("prose only", "also prose") == "" + + +def test_fenced_block_without_print_is_still_returned() -> None: + """Regression guard for the Lite-vs-Supervisor unification decision. + + Supervisor/code_act required ``print(`` even inside fenced blocks; the + canonical (Lite) behavior does not. If this flips, the unification was + silently reverted. + """ + text = "```python\nx = compute_value()\n```" + assert extract_and_combine_codeblocks(text) == "x = compute_value()" + + +def test_async_function_is_wrapped_not_returned_as_is() -> None: + """Supervisor's old impl returned coroutine funcs unchanged; canonical + (Lite) always wraps so Pydantic conversion applies to async tools too.""" + + async def fetch(x: int) -> int: + return x * 10 + + wrapped = make_tool_awaitable(fetch) + assert wrapped is not fetch + assert asyncio.iscoroutinefunction(wrapped) + assert asyncio.run(wrapped(4)) == 40 + + +def test_sync_function_becomes_awaitable() -> None: + def add(a: int, b: int) -> int: + return a + b + + wrapped = make_tool_awaitable(add) + assert asyncio.iscoroutinefunction(wrapped) + assert asyncio.run(wrapped(2, 3)) == 5 + + +def test_pydantic_result_is_model_dumped_for_sync_and_async() -> None: + class Result(BaseModel): + value: int + + def make_sync() -> BaseModel: + return Result(value=7) + + async def make_async() -> BaseModel: + return Result(value=9) + + assert asyncio.run(make_tool_awaitable(make_sync)()) == {"value": 7} + assert asyncio.run(make_tool_awaitable(make_async)()) == {"value": 9} diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_execution_policy.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_execution_policy.py new file mode 100644 index 00000000..9f7fd858 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_execution_policy.py @@ -0,0 +1,274 @@ +"""Unit tests for the three-backend ExecutionPlan / ExecutionRouter. + +These pin the *behavior-preserving* legacy-settings mapping required by +phase 2 of the sandbox refactor: the resolved ``python_backend`` must +match today's implicit decision in ``CodeExecutor.eval_with_tools_async`` +(``e2b`` iff ``advanced_features.e2b_sandbox``, else ``local``), and the +shell/filesystem axes must reflect the existing flags without any graph +consuming them yet. +""" + +from __future__ import annotations + +from types import SimpleNamespace + +from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution_policy import ( + ExecutionPlan, + ExecutionRouter, + split_execution_note, +) + + +def _settings(**advanced) -> SimpleNamespace: + defaults = dict( + e2b_sandbox=False, + opensandbox_sandbox=True, + enable_shell_tool=False, + sandbox_mode="native", + enable_filesystem_tools=False, + ) + defaults.update(advanced) + return SimpleNamespace(advanced_features=SimpleNamespace(**defaults)) + + +def _settings_with_execution( + *, + python_backend=None, + shell_backend=None, + filesystem_backend=None, + workspace_root=None, + **advanced, +) -> SimpleNamespace: + """Build settings that also carry an ``execution`` sub-section (Phase 6).""" + base = _settings(**advanced) + base.execution = SimpleNamespace( + python_backend=python_backend, + shell_backend=shell_backend, + filesystem_backend=filesystem_backend, + workspace_root=workspace_root, + ) + return base + + +# ─── python_backend (the behavior-preserving core) ────────────────────────── + + +def test_default_settings_resolve_python_local() -> None: + plan = ExecutionRouter.resolve(_settings()) + assert plan.python_backend == "local" + + +def test_e2b_sandbox_flag_resolves_python_e2b() -> None: + plan = ExecutionRouter.resolve(_settings(e2b_sandbox=True)) + assert plan.python_backend == "e2b" + + +def test_explicit_mode_overrides_settings() -> None: + plan = ExecutionRouter.resolve(_settings(e2b_sandbox=True), mode="local") + assert plan.python_backend == "local" + assert any("mode" in f.lower() for f in plan.fallbacks) + + +def test_opensandbox_flag_does_not_change_python_backend() -> None: + """Today opensandbox only affects shell tools, never the Python path.""" + plan = ExecutionRouter.resolve(_settings(opensandbox_sandbox=True)) + assert plan.python_backend == "local" + + +# ─── shell_backend ────────────────────────────────────────────────────────── + + +def test_shell_backend_none_when_shell_disabled() -> None: + plan = ExecutionRouter.resolve(_settings(enable_shell_tool=False)) + assert plan.shell_backend == "none" + + +def test_shell_backend_from_sandbox_mode_when_enabled() -> None: + plan = ExecutionRouter.resolve(_settings(enable_shell_tool=True, sandbox_mode="opensandbox")) + assert plan.shell_backend == "opensandbox" + + +def test_shell_backend_native_when_enabled_default_mode() -> None: + plan = ExecutionRouter.resolve(_settings(enable_shell_tool=True, sandbox_mode="native")) + assert plan.shell_backend == "native" + + +# ─── filesystem_backend ───────────────────────────────────────────────────── + + +def test_filesystem_backend_none_when_disabled() -> None: + plan = ExecutionRouter.resolve(_settings(enable_filesystem_tools=False)) + assert plan.filesystem_backend == "none" + + +def test_filesystem_backend_host_by_default_when_enabled() -> None: + plan = ExecutionRouter.resolve(_settings(enable_filesystem_tools=True)) + assert plan.filesystem_backend == "host" + + +def test_filesystem_backend_sandbox_remote_when_shell_is_opensandbox() -> None: + plan = ExecutionRouter.resolve( + _settings( + enable_filesystem_tools=True, + enable_shell_tool=True, + sandbox_mode="opensandbox", + ) + ) + assert plan.filesystem_backend == "sandbox_remote" + + +# ─── plan shape ───────────────────────────────────────────────────────────── + + +def test_plan_carries_local_control_tools_and_is_a_model() -> None: + plan = ExecutionRouter.resolve(_settings()) + assert isinstance(plan, ExecutionPlan) + assert "find_tools" in plan.local_control_tools + assert "load_skill" in plan.local_control_tools + assert plan.requested_backend # non-empty + assert isinstance(plan.fallbacks, list) + + +# ─── Phase 6: execution.* section overrides advanced_features ─────────────── + + +def test_execution_python_backend_e2b_overrides_advanced_features() -> None: + """execution.python_backend='e2b' → e2b even without advanced_features.e2b_sandbox.""" + plan = ExecutionRouter.resolve(_settings_with_execution(python_backend="e2b")) + assert plan.python_backend == "e2b" + + +def test_execution_python_backend_local_wins_over_advanced_e2b() -> None: + """execution.python_backend='local' overrides advanced_features.e2b_sandbox=True.""" + plan = ExecutionRouter.resolve(_settings_with_execution(python_backend="local", e2b_sandbox=True)) + assert plan.python_backend == "local" + + +def test_mode_still_wins_over_execution_python_backend() -> None: + """Explicit mode= param takes precedence over both execution and advanced_features.""" + plan = ExecutionRouter.resolve(_settings_with_execution(python_backend="e2b"), mode="local") + assert plan.python_backend == "local" + + +def test_execution_shell_backend_overrides_advanced_features() -> None: + """execution.shell_backend='native' enables shell without enable_shell_tool flag.""" + plan = ExecutionRouter.resolve(_settings_with_execution(shell_backend="native")) + assert plan.shell_backend == "native" + + +def test_execution_shell_backend_none_overrides_enabled_flag() -> None: + """execution.shell_backend='none' disables shell even if enable_shell_tool=True.""" + plan = ExecutionRouter.resolve(_settings_with_execution(shell_backend="none", enable_shell_tool=True)) + assert plan.shell_backend == "none" + + +def test_execution_filesystem_backend_overrides_advanced_features() -> None: + """execution.filesystem_backend='host' enables FS without enable_filesystem_tools.""" + plan = ExecutionRouter.resolve(_settings_with_execution(filesystem_backend="host")) + assert plan.filesystem_backend == "host" + + +def test_execution_workspace_root_propagates_to_plan() -> None: + """execution.workspace_root is surfaced on the plan.""" + plan = ExecutionRouter.resolve(_settings_with_execution(workspace_root="/custom")) + assert plan.workspace_root == "/custom" + + +def test_advanced_features_fallback_unchanged_without_execution_section() -> None: + """When no execution section, behavior is byte-identical to original.""" + plan_legacy = ExecutionRouter.resolve( + _settings(enable_shell_tool=True, sandbox_mode="opensandbox", enable_filesystem_tools=True) + ) + plan_no_exec = ExecutionRouter.resolve( + _settings_with_execution( + enable_shell_tool=True, sandbox_mode="opensandbox", enable_filesystem_tools=True + ) + ) + assert plan_legacy.python_backend == plan_no_exec.python_backend + assert plan_legacy.shell_backend == plan_no_exec.shell_backend + assert plan_legacy.filesystem_backend == plan_no_exec.filesystem_backend + + +# ─── split_execution_note (Phase 10: prompt/log visibility) ───────────────── + + +def _plan(python_backend, shell_backend, filesystem_backend) -> ExecutionPlan: + return ExecutionPlan( + requested_backend=python_backend, + python_backend=python_backend, + shell_backend=shell_backend, + filesystem_backend=filesystem_backend, + ) + + +def test_split_execution_note_empty_when_fully_local() -> None: + plan = _plan("local", "none", "none") + assert split_execution_note(plan) == "" + + +def test_split_execution_note_empty_when_python_is_e2b() -> None: + """When Python itself runs remotely, there is no split — note is silent.""" + plan = _plan("e2b", "opensandbox", "sandbox_remote") + assert split_execution_note(plan) == "" + + +def test_split_execution_note_nonempty_when_local_python_remote_shell() -> None: + plan = _plan("local", "opensandbox", "none") + note = split_execution_note(plan) + assert note != "" + assert "run_command" in note.lower() or "shell" in note.lower() + + +def test_split_execution_note_nonempty_when_local_python_remote_fs() -> None: + plan = _plan("local", "none", "sandbox_remote") + note = split_execution_note(plan) + assert note != "" + + +def test_split_execution_note_nonempty_when_both_remote() -> None: + plan = _plan("local", "native", "sandbox_remote") + note = split_execution_note(plan) + assert note != "" + assert "local" in note.lower() or "sandbox" in note.lower() + + +# ─── Phase 11: legacy deprecation warnings ────────────────────────────────── +# When execution.* settings override advanced_features.* flags that disagree, +# ExecutionRouter should record a deprecation notice in plan.fallbacks so +# operators can migrate away from the old flags. + + +def test_legacy_e2b_sandbox_deprecated_when_execution_python_backend_differs() -> None: + """execution.python_backend='local' + advanced_features.e2b_sandbox=True → deprecation. + + The new setting wins (behavior-preserving), but the operator should be + told that e2b_sandbox is now superseded and should be removed. + """ + plan = ExecutionRouter.resolve(_settings_with_execution(python_backend="local", e2b_sandbox=True)) + assert plan.python_backend == "local" + assert any("e2b_sandbox" in f.lower() or "deprecated" in f.lower() for f in plan.fallbacks) + + +def test_no_deprecation_when_execution_python_backend_agrees_with_e2b_flag() -> None: + """No deprecation when execution.python_backend='e2b' and e2b_sandbox=True (they agree).""" + plan = ExecutionRouter.resolve(_settings_with_execution(python_backend="e2b", e2b_sandbox=True)) + assert plan.python_backend == "e2b" + # No e2b_sandbox deprecation when the flags agree + assert not any("e2b_sandbox" in f.lower() for f in plan.fallbacks) + + +def test_legacy_shell_flag_deprecated_when_execution_shell_backend_disagrees() -> None: + """execution.shell_backend='none' + advanced_features.enable_shell_tool=True → deprecation.""" + plan = ExecutionRouter.resolve(_settings_with_execution(shell_backend="none", enable_shell_tool=True)) + assert plan.shell_backend == "none" + assert any("enable_shell_tool" in f.lower() or "deprecated" in f.lower() for f in plan.fallbacks) + + +def test_no_deprecation_when_no_execution_section_and_legacy_flags_used() -> None: + """Pure advanced_features path (no execution.*) never emits deprecation warnings.""" + plan = ExecutionRouter.resolve( + _settings(e2b_sandbox=True, enable_shell_tool=True, sandbox_mode="opensandbox") + ) + assert plan.python_backend == "e2b" + # No deprecation without an execution.* override + assert not any("deprecated" in f.lower() for f in plan.fallbacks) diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_graph_adapter_hooks.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_graph_adapter_hooks.py new file mode 100644 index 00000000..ce6702fe --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_graph_adapter_hooks.py @@ -0,0 +1,236 @@ +"""Phase 1 + Phase 2 — CoreGraphAdapter optional call_model hook extensions. + +Pins the optional hooks added to CoreGraphAdapter so that: +1. A minimal concrete subclass that only implements the abstract methods + gets no-op defaults for all hooks. +2. A subclass that overrides the hooks returns its custom values. +3. The hooks have the correct signatures. + +Phase 2 additions: prepare_system_content, normalize_response, get_invoke_config, +on_response_processed, build_metadata_update, get_variables_storage, get_tracker, +and the async classify_auto_continue (model + reasoning args). +""" + +from __future__ import annotations + +from types import SimpleNamespace +from typing import Any, List, Optional + +import pytest +from langchain_core.messages import BaseMessage, HumanMessage + +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import CoreGraphAdapter + + +# ── Minimal concrete adapter (only satisfies abstract methods) ───────────── + + +class _MinimalAdapter(CoreGraphAdapter): + messages_key = "chat_messages" + + def get_messages(self, state: Any) -> List[BaseMessage]: + return [] + + def resolve_max_steps(self, state: Any, override: Optional[int]) -> int: + return override or 50 + + +# ── 1. Default hook values ───────────────────────────────────────────────── + + +def test_default_get_few_shot_messages_returns_empty_list(): + adapter = _MinimalAdapter() + state = SimpleNamespace() + result = adapter.get_few_shot_messages(state) + assert result == [] + + +def test_default_get_pi_returns_none(): + adapter = _MinimalAdapter() + state = SimpleNamespace() + assert adapter.get_pi(state) is None + + +@pytest.mark.asyncio +async def test_default_classify_auto_continue_returns_false(): + adapter = _MinimalAdapter() + state = SimpleNamespace() + assert ( + await adapter.classify_auto_continue(state, model=None, content="some content", reasoning=None) + is False + ) + + +@pytest.mark.asyncio +async def test_default_resolve_bind_tools_returns_none(): + adapter = _MinimalAdapter() + state = SimpleNamespace() + assert await adapter.resolve_bind_tools(state, active_model=None, configurable={}) is None + + +def test_default_prepare_system_content_returns_base_prompt(): + adapter = _MinimalAdapter() + state = SimpleNamespace() + assert adapter.prepare_system_content(state, {}, "base prompt") == "base prompt" + + +def test_default_normalize_response_extracts_content_and_reasoning(): + adapter = _MinimalAdapter() + response = SimpleNamespace( + content="hello", + additional_kwargs={"reasoning_content": "thought"}, + ) + content, reasoning = adapter.normalize_response(response) + assert content == "hello" + assert reasoning == "thought" + + +def test_default_normalize_response_none_reasoning(): + adapter = _MinimalAdapter() + response = SimpleNamespace(content="hi", additional_kwargs={}) + content, reasoning = adapter.normalize_response(response) + assert content == "hi" + assert reasoning is None + + +def test_default_get_invoke_config_returns_empty_dict(): + adapter = _MinimalAdapter() + assert adapter.get_invoke_config({}) == {} + + +def test_default_on_response_processed_is_noop(): + adapter = _MinimalAdapter() + state = SimpleNamespace() + # Must not raise + adapter.on_response_processed(state, code=None, content="hi") + adapter.on_response_processed(state, code="print(1)", content="```python\nprint(1)\n```") + + +def test_default_build_metadata_update_returns_existing_meta_when_no_playbook(): + adapter = _MinimalAdapter() + state = SimpleNamespace(cuga_lite_metadata={"some_key": True}) + result = adapter.build_metadata_update(state, playbook_fired=False) + assert result == {"some_key": True} + + +def test_default_build_metadata_update_adds_playbook_flag_when_fired(): + adapter = _MinimalAdapter() + state = SimpleNamespace(cuga_lite_metadata={"some_key": True}) + result = adapter.build_metadata_update(state, playbook_fired=True) + assert result["playbook_guidance_added"] is True + assert result["some_key"] is True + + +def test_default_get_variables_storage_returns_state_attr(): + adapter = _MinimalAdapter() + storage = {"x": {"value": 1}} + state = SimpleNamespace(variables_storage=storage) + assert adapter.get_variables_storage(state) is storage + + +def test_default_get_tracker_returns_none(): + adapter = _MinimalAdapter() + assert adapter.get_tracker() is None + + +@pytest.mark.asyncio +async def test_default_ainvoke_model_delegates_to_bound(): + from unittest.mock import AsyncMock, MagicMock + + adapter = _MinimalAdapter() + sentinel = object() + mock_bound = MagicMock() + mock_bound.ainvoke = AsyncMock(return_value=sentinel) + result = await adapter.ainvoke_model(mock_bound, ["msg"], {"callbacks": []}) + assert result is sentinel + mock_bound.ainvoke.assert_awaited_once_with(["msg"], config={"callbacks": []}) + + +# ── 2. Overriding hooks ──────────────────────────────────────────────────── + + +class _FullAdapter(CoreGraphAdapter): + messages_key = "chat_messages" + + def get_messages(self, state: Any) -> List[BaseMessage]: + return state.chat_messages or [] + + def resolve_max_steps(self, state: Any, override: Optional[int]) -> int: + return override or 10 + + def get_few_shot_messages(self, state: Any) -> List[BaseMessage]: + return state.few_shot or [] + + def get_pi(self, state: Any) -> Optional[str]: + return getattr(state, "pi", None) + + async def classify_auto_continue( + self, state: Any, model: Any, content: str, reasoning: Optional[str] + ) -> bool: + return "CONTINUE" in content + + async def resolve_bind_tools(self, state: Any, active_model: Any, configurable: dict) -> Any: + return "bound_model_sentinel" + + +def test_overridden_get_few_shot_messages(): + adapter = _FullAdapter() + msg = HumanMessage(content="example") + state = SimpleNamespace(few_shot=[msg]) + assert adapter.get_few_shot_messages(state) == [msg] + + +def test_overridden_get_pi(): + adapter = _FullAdapter() + state = SimpleNamespace(pi="You are a helpful assistant.") + assert adapter.get_pi(state) == "You are a helpful assistant." + + +@pytest.mark.asyncio +async def test_overridden_classify_auto_continue_true(): + adapter = _FullAdapter() + state = SimpleNamespace() + assert ( + await adapter.classify_auto_continue(state, None, "Please CONTINUE with the next step.", None) is True + ) + + +@pytest.mark.asyncio +async def test_overridden_classify_auto_continue_false(): + adapter = _FullAdapter() + state = SimpleNamespace() + assert await adapter.classify_auto_continue(state, None, "All done.", None) is False + + +@pytest.mark.asyncio +async def test_overridden_resolve_bind_tools(): + adapter = _FullAdapter() + state = SimpleNamespace() + result = await adapter.resolve_bind_tools(state, active_model=None, configurable={}) + assert result == "bound_model_sentinel" + + +# ── 3. Hook signatures are stable under keyword call ────────────────────── + + +@pytest.mark.asyncio +async def test_all_hooks_callable_with_keyword_args(): + adapter = _MinimalAdapter() + state = SimpleNamespace(variables_storage={}, cuga_lite_metadata={}) + response = SimpleNamespace(content="hi", additional_kwargs={}) + # Should not raise + adapter.get_few_shot_messages(state) + adapter.get_pi(state) + await adapter.classify_auto_continue(state, model=None, content="hello", reasoning=None) + await adapter.resolve_bind_tools(state, active_model=None, configurable={}) + adapter.prepare_system_content(state, {}, "prompt") + adapter.normalize_response(response) + adapter.get_invoke_config({}) + adapter.on_response_processed(state, code=None, content="hi") + adapter.build_metadata_update(state, playbook_fired=False) + adapter.get_variables_storage(state) + adapter.get_tracker() + from unittest.mock import AsyncMock + + mock_bound = AsyncMock() + await adapter.ainvoke_model(mock_bound, [], {}) diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_graph_nodes.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_graph_nodes.py new file mode 100644 index 00000000..12f59319 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_graph_nodes.py @@ -0,0 +1,336 @@ +"""Tests for the shared loop helpers (slice 4a). + +These pin behavior of the de-duplicated ``append_chat_messages_with_step_limit`` +and ``create_error_command`` extracted from Lite + Supervisor. The only +graph-specific bits are abstracted behind ``CoreGraphAdapter``: + +- ``messages_key`` — state key the error Command writes messages to; +- ``get_messages`` — base message list to append onto; +- ``resolve_max_steps`` — how the step limit is resolved. + +Parity guards reproduce the exact Lite and Supervisor adapter behavior so +the extraction cannot silently drift. +""" + +from __future__ import annotations + +from types import SimpleNamespace + +from langchain_core.messages import AIMessage, HumanMessage +from langgraph.graph import END + +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import ( + CoreGraphAdapter, + append_chat_messages_with_step_limit, + create_error_command, + enforce_step_limit, + execution_output_text, + inject_playbook_guidance, +) + + +# ─── execution_output_text (Candidate 3 micro-slice) ──────────────────────── + + +def test_execution_output_text_format(): + assert execution_output_text("hello\nworld") == "Execution output:\nhello\nworld" + assert execution_output_text("") == "Execution output:\n" + + +# ─── CoreGraphAdapter.get_variable_manager default + override ──────────────── + + +def test_get_variable_manager_default_uses_state_variables_manager(): + class A(CoreGraphAdapter): + messages_key = "m" + + def get_messages(self, state): + return [] + + def resolve_max_steps(self, state, override): + return 1 + + vm = object() + assert A().get_variable_manager(SimpleNamespace(variables_manager=vm)) is vm + # absent attribute -> None (default is getattr-with-None) + assert A().get_variable_manager(SimpleNamespace()) is None + + +def test_get_variable_manager_is_overridable(): + sup_vm = object() + + class SupLike(CoreGraphAdapter): + messages_key = "supervisor_chat_messages" + + def get_messages(self, state): + return [] + + def resolve_max_steps(self, state, override): + return 1 + + def get_variable_manager(self, state): + return state.supervisor_variables_manager + + assert SupLike().get_variable_manager(SimpleNamespace(supervisor_variables_manager=sup_vm)) is sup_vm + + +class _FakeAdapter(CoreGraphAdapter): + messages_key = "m" + + def __init__(self, limit: int): + self._limit = limit + + def get_messages(self, state): + return list(getattr(state, "m", None) or []) + + def resolve_max_steps(self, state, override): + return override if override is not None else self._limit + + +def _state(messages, step_count): + return SimpleNamespace(m=messages, step_count=step_count) + + +# ─── append_chat_messages_with_step_limit ─────────────────────────────────── + + +def test_under_limit_appends_and_returns_no_error(): + base = [HumanMessage(content="hi")] + new = [AIMessage(content="ok")] + out, err = append_chat_messages_with_step_limit(_FakeAdapter(limit=5), _state(base, step_count=1), new) + assert err is None + assert out == base + new + + +def test_over_limit_appends_error_message_and_returns_it(): + out, err = append_chat_messages_with_step_limit( + _FakeAdapter(limit=2), _state([], step_count=2), [AIMessage(content="x")] + ) + assert isinstance(err, AIMessage) + assert "Maximum step limit (2) reached" in err.content + assert out[-1] is err + + +def test_limit_boundary_is_strictly_greater_than(): + # step_count=1, limit=2 -> new_step_count=2, not > 2 -> OK + _, err_ok = append_chat_messages_with_step_limit(_FakeAdapter(limit=2), _state([], step_count=1), []) + assert err_ok is None + # step_count=2, limit=2 -> new_step_count=3 > 2 -> error + _, err_bad = append_chat_messages_with_step_limit(_FakeAdapter(limit=2), _state([], step_count=2), []) + assert err_bad is not None + + +def test_max_steps_override_takes_precedence(): + _, err = append_chat_messages_with_step_limit( + _FakeAdapter(limit=999), _state([], step_count=3), [], max_steps=2 + ) + assert err is not None + assert "Maximum step limit (2) reached" in err.content + + +# ─── create_error_command ─────────────────────────────────────────────────── + + +def test_error_command_routes_to_end_with_expected_update(): + msgs = [AIMessage(content="boom")] + err = AIMessage(content="boom") + cmd = create_error_command(_FakeAdapter(limit=1), msgs, err, step_count=4) + assert cmd.goto == END + u = cmd.update + assert u["m"] == msgs + assert u["script"] is None + assert u["final_answer"] == "boom" + assert u["execution_complete"] is True + assert u["error"] == "boom" + assert u["step_count"] == 5 + + +def test_error_command_additional_updates_merge_and_override(): + err = AIMessage(content="e") + cmd = create_error_command( + _FakeAdapter(limit=1), + [], + err, + step_count=0, + additional_updates={"final_answer": "overridden", "extra": 1}, + ) + assert cmd.update["final_answer"] == "overridden" + assert cmd.update["extra"] == 1 + + +# ─── enforce_step_limit (Candidate 2: the in-call_model branch check) ──────── + + +def test_enforce_step_limit_returns_none_under_limit(): + cmd = enforce_step_limit( + _FakeAdapter(limit=99), + state=_state([], step_count=3), + messages=[HumanMessage(content="u")], + new_step_count=4, + limit=10, + ) + assert cmd is None + + +def test_enforce_step_limit_returns_end_command_over_limit(): + msgs = [HumanMessage(content="u"), AIMessage(content="code")] + cmd = enforce_step_limit( + _FakeAdapter(limit=99), + state=_state([], step_count=2), + messages=msgs, + new_step_count=3, + limit=2, + ) + assert cmd is not None + assert cmd.goto == END + # uses the PASSED messages (not adapter.get_messages) + appended error + written = cmd.update["m"] + assert written[:2] == msgs + assert isinstance(written[-1], AIMessage) + assert "Maximum step limit (2) reached" in written[-1].content + assert cmd.update["final_answer"] == written[-1].content + assert cmd.update["step_count"] == 3 # state.step_count (2) + 1 + + +def test_enforce_step_limit_boundary_strictly_greater(): + common = dict(state=_state([], step_count=0), messages=[]) + assert enforce_step_limit(_FakeAdapter(1), new_step_count=2, limit=2, **common) is None + assert enforce_step_limit(_FakeAdapter(1), new_step_count=3, limit=2, **common) is not None + + +def test_enforce_step_limit_respects_adapter_messages_key(): + class SupLike(CoreGraphAdapter): + messages_key = "supervisor_chat_messages" + + def get_messages(self, state): + return [] + + def resolve_max_steps(self, state, override): + return 50 + + cmd = enforce_step_limit( + SupLike(), + state=_state([], step_count=5), + messages=[], + new_step_count=99, + limit=1, + ) + assert "supervisor_chat_messages" in cmd.update + assert cmd.update["step_count"] == 6 + + +# ─── parity with the real Lite / Supervisor adapter behavior ──────────────── + + +def test_lite_like_adapter_matches_legacy_behavior(): + """Lite: base = state.chat_messages, key = 'chat_messages', + max_steps = override or settings default (here injected = 50).""" + + class LiteLike(CoreGraphAdapter): + messages_key = "chat_messages" + + def get_messages(self, state): + return state.chat_messages + + def resolve_max_steps(self, state, override): + return override if override is not None else 50 + + st = SimpleNamespace(chat_messages=[HumanMessage(content="u")], step_count=0) + out, err = append_chat_messages_with_step_limit(LiteLike(), st, [AIMessage(content="a")]) + assert err is None + assert out == [HumanMessage(content="u"), AIMessage(content="a")] + cmd = create_error_command(LiteLike(), out, AIMessage(content="z"), 0) + assert "chat_messages" in cmd.update + + +def test_supervisor_like_adapter_handles_none_messages(): + """Supervisor: base = state.supervisor_chat_messages or [], + key = 'supervisor_chat_messages', max_steps = state value or 50.""" + + class SupLike(CoreGraphAdapter): + messages_key = "supervisor_chat_messages" + + def get_messages(self, state): + return state.supervisor_chat_messages or [] + + def resolve_max_steps(self, state, override): + v = state.cuga_lite_max_steps + return v if v is not None else 50 + + st = SimpleNamespace(supervisor_chat_messages=None, step_count=0, cuga_lite_max_steps=None) + out, err = append_chat_messages_with_step_limit(SupLike(), st, [AIMessage(content="a")]) + assert err is None + assert out == [AIMessage(content="a")] + cmd = create_error_command(SupLike(), out, AIMessage(content="z"), 7) + assert "supervisor_chat_messages" in cmd.update + assert cmd.update["step_count"] == 8 + + +# ─── inject_playbook_guidance ──────────────────────────────────────────────── + + +def test_inject_playbook_guidance_no_op_when_metadata_is_none(): + msgs = [HumanMessage(content="hi")] + assert inject_playbook_guidance(msgs, None) is msgs + + +def test_inject_playbook_guidance_no_op_when_empty_metadata(): + msgs = [HumanMessage(content="hi")] + assert inject_playbook_guidance(msgs, {}) is msgs + + +def test_inject_playbook_guidance_no_op_when_not_policy_matched(): + msgs = [HumanMessage(content="hi")] + meta = {"policy_matched": False, "policy_type": "playbook", "playbook_guidance": "g"} + assert inject_playbook_guidance(msgs, meta) == msgs + + +def test_inject_playbook_guidance_no_op_when_wrong_policy_type(): + msgs = [HumanMessage(content="hi")] + meta = {"policy_matched": True, "policy_type": "tool_guide", "playbook_guidance": "g"} + assert inject_playbook_guidance(msgs, meta) == msgs + + +def test_inject_playbook_guidance_no_op_when_already_added(): + msgs = [HumanMessage(content="hi")] + meta = { + "policy_matched": True, + "policy_type": "playbook", + "playbook_guidance": "g", + "playbook_guidance_added": True, + } + assert inject_playbook_guidance(msgs, meta) == msgs + + +def test_inject_playbook_guidance_no_op_when_guidance_empty(): + msgs = [HumanMessage(content="hi")] + meta = {"policy_matched": True, "policy_type": "playbook", "playbook_guidance": ""} + assert inject_playbook_guidance(msgs, meta) == msgs + + +def test_inject_playbook_guidance_injects_into_last_human_message(): + msgs = [ + HumanMessage(content="first"), + AIMessage(content="resp"), + HumanMessage(content="second"), + ] + meta = {"policy_matched": True, "policy_type": "playbook", "playbook_guidance": "step 1\nstep 2"} + result = inject_playbook_guidance(msgs, meta) + assert result[0].content == "first" + assert result[1].content == "resp" + assert result[2].content == "second\n\n## Task Guidance\nstep 1\nstep 2" + + +def test_inject_playbook_guidance_does_not_mutate_input(): + msgs = [HumanMessage(content="hi")] + meta = {"policy_matched": True, "policy_type": "playbook", "playbook_guidance": "g"} + result = inject_playbook_guidance(msgs, meta) + assert msgs[0].content == "hi" + assert result is not msgs + + +def test_inject_playbook_guidance_no_op_when_no_human_message(): + msgs = [AIMessage(content="response only")] + meta = {"policy_matched": True, "policy_type": "playbook", "playbook_guidance": "g"} + result = inject_playbook_guidance(msgs, meta) + assert result == msgs diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_policy_enactment_adapter.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_policy_enactment_adapter.py new file mode 100644 index 00000000..7a42fa91 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_policy_enactment_adapter.py @@ -0,0 +1,91 @@ +"""Behavior-pinning tests for the adapter-ized PolicyEnactment block path. + +``_enact_block_intent`` was hard-coupled to ``CugaLiteState`` (read +``state.chat_messages``; write update keys ``chat_messages`` / +``cuga_lite_metadata``). It now takes an optional ``CoreGraphAdapter``: + +- ``adapter=None`` → exact legacy Lite literals (back-compat for the + output-formatter caller and any other path); +- Lite adapter → identical (its seam values equal those literals); +- Supervisor adapter → ``supervisor_chat_messages`` / ``supervisor_metadata``. +""" + +from __future__ import annotations + +from types import SimpleNamespace + +from langchain_core.messages import AIMessage, HumanMessage +from langgraph.graph import END + +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import CoreGraphAdapter +from cuga.backend.cuga_graph.policy.enactment import PolicyEnactment + + +class LiteLike(CoreGraphAdapter): + messages_key = "chat_messages" + + def get_messages(self, state): + return state.chat_messages + + def resolve_max_steps(self, state, override): + return 999 + + +class SupLike(CoreGraphAdapter): + messages_key = "supervisor_chat_messages" + metadata_key = "supervisor_metadata" + + def get_messages(self, state): + return state.supervisor_chat_messages or [] + + def resolve_max_steps(self, state, override): + return 999 + + +def _match(): + return SimpleNamespace( + action=SimpleNamespace(content="blocked!"), + policy=SimpleNamespace(id="p1", name="Guard"), + reasoning="nope", + confidence=0.9, + ) + + +def _lite_state(): + return SimpleNamespace(chat_messages=[HumanMessage(content="hi")]) + + +def test_block_intent_legacy_default_no_adapter(): + cmd, meta = PolicyEnactment._enact_block_intent(_lite_state(), _match(), None) + assert meta is None + assert cmd.goto == END + u = cmd.update + assert isinstance(u["chat_messages"][-1], AIMessage) + assert u["chat_messages"][-1].content == "blocked!" + assert u["final_answer"] == "blocked!" + assert u["execution_complete"] is True + assert u["step_count"] == 0 + assert u["cuga_lite_metadata"]["policy_blocked"] is True + assert u["cuga_lite_metadata"]["policy_id"] == "p1" + assert u["cuga_lite_metadata"]["policy_type"] == "intent_guard" + + +def test_block_intent_lite_adapter_is_identical_to_legacy(): + legacy_cmd, _ = PolicyEnactment._enact_block_intent(_lite_state(), _match(), None) + lite_cmd, _ = PolicyEnactment._enact_block_intent(_lite_state(), _match(), LiteLike()) + assert lite_cmd.goto == legacy_cmd.goto + assert set(lite_cmd.update) == set(legacy_cmd.update) + assert "cuga_lite_metadata" in lite_cmd.update + assert "chat_messages" in lite_cmd.update + + +def test_block_intent_supervisor_adapter_uses_supervisor_keys(): + st = SimpleNamespace(supervisor_chat_messages=[HumanMessage(content="u")]) + cmd, _ = PolicyEnactment._enact_block_intent(st, _match(), SupLike()) + assert cmd.goto == END + assert "supervisor_chat_messages" in cmd.update + assert "supervisor_metadata" in cmd.update + assert "chat_messages" not in cmd.update + assert "cuga_lite_metadata" not in cmd.update + assert cmd.update["supervisor_chat_messages"][-1].content == "blocked!" + assert cmd.update["supervisor_metadata"]["policy_blocked"] is True diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_runtime_tools.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_runtime_tools.py new file mode 100644 index 00000000..6e07e4e1 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_runtime_tools.py @@ -0,0 +1,288 @@ +"""Tests for the shared runtime tool-injection orchestrator. + +Two concerns, kept separate: + +1. ``resolve_runtime_backends`` must reproduce *exactly* the legacy gating + currently inlined in ``cuga_lite_graph`` (the + ``_sandbox_mode/_shell_tool_on/_fs_tool_on/_opensandbox_on/_use_sandbox`` + block). These are the behavior-preservation guards. +2. ``build_runtime_tools`` only *orchestrates* the existing + ``create_filesystem_tools`` + shell ``create_sandbox_tools`` packages — + it must not re-implement any tool. We assert the wiring (which backend, + which executor, prompt vs execution namespace) with fakes. +""" + +from __future__ import annotations + +from types import SimpleNamespace + +import pytest + +from cuga.backend.cuga_graph.nodes.cuga_agent_core.runtime_tools import ( + RuntimeBackends, + ToolBundle, + build_runtime_tools, + prompt_tool_dicts, + resolve_runtime_backends, +) + + +# ─── prompt_tool_dicts (Phase 5: expose runtime tools in Supervisor prompt) ── + + +class _PT: + def __init__(self, name, description, args): + self.name = name + self.description = description + self.args = args + + +def test_prompt_tool_dicts_shape_for_supervisor_template(): + out = prompt_tool_dicts([_PT("read_file", "Read a file.", {"path": {}, "start": {}})]) + assert out == [ + { + "name": "read_file", + "description": "Read a file.", + "params_str": "path, start", + "params_doc": "- path\n- start", + "response_doc": "", + } + ] + + +def test_prompt_tool_dicts_no_args_and_empty_list(): + assert prompt_tool_dicts([]) == [] + out = prompt_tool_dicts([_PT("run_command", "", {})]) + assert out[0]["params_str"] == "" + assert out[0]["params_doc"] == "No parameters required" + assert out[0]["description"] == "" + + +def test_prompt_tool_dicts_survives_bad_args(): + class Bad: + name = "x" + description = "d" + + @property + def args(self): + raise RuntimeError("schema build failed") + + out = prompt_tool_dicts([Bad()]) + assert out[0]["params_str"] == "" + assert out[0]["name"] == "x" + + +def _settings(**adv) -> SimpleNamespace: + defaults = dict( + e2b_sandbox=False, + opensandbox_sandbox=True, + enable_shell_tool=False, + sandbox_mode="native", + enable_filesystem_tools=False, + ) + defaults.update(adv) + return SimpleNamespace(advanced_features=SimpleNamespace(**defaults)) + + +# ─── resolve_runtime_backends: exact legacy-gating parity ─────────────────── + + +def test_defaults_inject_nothing(): + b = resolve_runtime_backends(_settings(), {}) + assert b == RuntimeBackends(filesystem="none", shell="none") + + +def test_fs_only_uses_host_backend(): + b = resolve_runtime_backends(_settings(enable_filesystem_tools=True), {}) + assert b.filesystem == "host" + assert b.shell == "none" + + +def test_configurable_override_enables_fs(): + b = resolve_runtime_backends(_settings(enable_filesystem_tools=False), {"enable_filesystem_tools": True}) + assert b.filesystem == "host" + + +def test_shell_native_when_enabled(): + b = resolve_runtime_backends(_settings(enable_shell_tool=True, sandbox_mode="native"), {}) + assert b.shell == "native" + + +def test_shell_local_when_enabled(): + b = resolve_runtime_backends(_settings(enable_shell_tool=True, sandbox_mode="local"), {}) + assert b.shell == "local" + + +def test_shell_opensandbox_requires_opensandbox_flag(): + on = resolve_runtime_backends( + _settings(enable_shell_tool=True, sandbox_mode="opensandbox", opensandbox_sandbox=True), {} + ) + off = resolve_runtime_backends( + _settings(enable_shell_tool=True, sandbox_mode="opensandbox", opensandbox_sandbox=False), {} + ) + assert on.shell == "opensandbox" + assert off.shell == "none" # _use_sandbox is False without the flag + + +def test_fs_uses_sandbox_remote_only_with_opensandbox_shell(): + remote = resolve_runtime_backends( + _settings( + enable_filesystem_tools=True, + enable_shell_tool=True, + sandbox_mode="opensandbox", + opensandbox_sandbox=True, + ), + {}, + ) + host = resolve_runtime_backends( + _settings(enable_filesystem_tools=True, enable_shell_tool=True, sandbox_mode="native"), + {}, + ) + assert remote.filesystem == "sandbox_remote" + assert host.filesystem == "host" + + +# ─── build_runtime_tools: orchestration only ──────────────────────────────── + + +class _FakeTool: + def __init__(self, name, coroutine=None, func=None): + self.name = name + self.coroutine = coroutine + self.func = func + + +@pytest.fixture +def patch_packages(monkeypatch): + created = {} + + def fake_create_fs(thread_id=None, *, backend=None): + created["fs_thread_id"] = thread_id + created["fs_backend"] = backend + + async def _rf(): + return "x" + + return [_FakeTool("read_file", coroutine=_rf), _FakeTool("write_file", func=lambda: None)] + + class FakeRemoteBackend: + def __init__(self, executor, thread_id=None): + created["remote_executor"] = executor + created["remote_thread_id"] = thread_id + + import cuga.backend.cuga_graph.nodes.cuga_lite.executors.filesystem as fs_pkg + + monkeypatch.setattr(fs_pkg, "create_filesystem_tools", fake_create_fs) + monkeypatch.setattr(fs_pkg, "RemoteSandboxBackend", FakeRemoteBackend) + + class FakeShellExecutor: + def __init__(self, label): + self.label = label + + def create_sandbox_tools(self, thread_id=None): + created["shell_thread_id"] = thread_id + created["shell_label"] = self.label + + async def _rc(): + return "ran" + + return [_FakeTool("run_command", coroutine=_rc)] + + from cuga.backend.cuga_graph.nodes.cuga_lite.executors import CodeExecutor + + monkeypatch.setattr( + CodeExecutor, "_get_native_executor", classmethod(lambda cls: FakeShellExecutor("native")) + ) + monkeypatch.setattr( + CodeExecutor, + "_get_local_sandbox_executor", + classmethod(lambda cls: FakeShellExecutor("local")), + ) + monkeypatch.setattr( + CodeExecutor, + "_get_opensandbox_executor", + classmethod(lambda cls: FakeShellExecutor("opensandbox")), + ) + return created + + +def test_none_none_produces_empty_bundle(patch_packages): + bundle = build_runtime_tools(thread_id="t1", backends=RuntimeBackends("none", "none")) + assert isinstance(bundle, ToolBundle) + assert bundle.prompt_tools == [] + assert bundle.execution_callables == {} + assert bundle.app_definitions == [] + + +def test_host_fs_orchestrates_create_filesystem_tools(patch_packages): + bundle = build_runtime_tools(thread_id="t1", backends=RuntimeBackends("host", "none")) + assert patch_packages["fs_backend"] is None # host default + assert {t.name for t in bundle.prompt_tools} == {"read_file", "write_file"} + assert set(bundle.execution_callables) == {"read_file", "write_file"} + assert [a.name for a in bundle.app_definitions] == ["filesystem"] + + +def test_sandbox_remote_fs_builds_remote_backend(patch_packages): + build_runtime_tools(thread_id="t9", backends=RuntimeBackends("sandbox_remote", "opensandbox")) + # RemoteSandboxBackend wired with the opensandbox executor + thread_id + assert patch_packages["fs_backend"] is not None + assert patch_packages["remote_thread_id"] == "t9" + + +def test_shell_native_uses_native_executor_only(patch_packages): + bundle = build_runtime_tools(thread_id="t1", backends=RuntimeBackends("none", "native")) + assert patch_packages["shell_label"] == "native" + assert set(bundle.execution_callables) == {"run_command"} + assert bundle.app_definitions == [] # shell-only: no filesystem app def + + +def test_callable_is_coroutine_or_func_and_skips_empty(patch_packages): + bundle = build_runtime_tools(thread_id="t1", backends=RuntimeBackends("host", "none")) + # write_file had only .func, read_file had .coroutine — both captured + assert callable(bundle.execution_callables["read_file"]) + assert callable(bundle.execution_callables["write_file"]) + + +# ─── Skills parity: prompt_tool_dicts on real StructuredTool (Supervisor) ──── + + +def test_skill_tool_is_prompt_dict_compatible(): + """Real StructuredTool from create_skill_tools must survive prompt_tool_dicts + and produce a load_skill dict with the correct keys for Supervisor's template. + + This is the integration seam the skills-parity wiring uses: + create_skill_tools(registry) → prompt_tool_dicts(tools) → agent_tools_for_prompt + """ + from cuga.backend.skills.registry import SkillEntry, SkillRegistry + from cuga.backend.skills.tools import create_skill_tools + + entry = SkillEntry(name="demo", description="A demo skill.", body="# demo", source="test") + registry = SkillRegistry([entry]) + skill_tools = create_skill_tools(registry) + + dicts = prompt_tool_dicts(skill_tools) + + assert len(dicts) == 1 + d = dicts[0] + assert d["name"] == "load_skill" + assert "name" in d["params_str"] + assert d["description"] != "" + + +@pytest.mark.asyncio +async def test_skill_tool_func_is_awaitable_via_make_tool_awaitable(): + """Skill tools use .func (sync) — make_tool_awaitable must wrap it so + Supervisor's execute_agent_tool can ``await load_skill(...)`` in code blocks.""" + from cuga.backend.skills.registry import SkillEntry, SkillRegistry + from cuga.backend.skills.tools import create_skill_tools + from cuga.backend.cuga_graph.nodes.cuga_agent_core.code_extraction import make_tool_awaitable + + entry = SkillEntry(name="my_skill", description="Test.", body="instructions", source="t") + registry = SkillRegistry([entry]) + skill_tools = create_skill_tools(registry) + + load_skill_tool = skill_tools[0] + wrapped = make_tool_awaitable(load_skill_tool.func) + + result = await wrapped(name="my_skill") + assert "instructions" in result diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_shared_call_model.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_shared_call_model.py new file mode 100644 index 00000000..e721ec37 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_shared_call_model.py @@ -0,0 +1,302 @@ +"""Phase 2 — create_call_model_node shared factory tests. + +Pins the routing behaviour of the shared call_model node so that: +1. When the model returns a fenced code block → Command(goto=execute_node_name) +2. When the model returns plain text → Command(goto=END) with final_answer +3. When the step limit is exceeded → Command(goto=END) with error message +4. When classify_auto_continue fires → Command(goto="call_model") with "continue" +5. When returning from tool-approval → approval resumption takes priority + +All test use a _MinimalTestAdapter with no-op hooks (Supervisor-equivalent +behaviour) so the tests cover only the shared logic, not adapter-specific +behaviours tested in later phases. +""" + +from __future__ import annotations + +from types import SimpleNamespace +from typing import Any, List, Optional +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from langchain_core.messages import BaseMessage, HumanMessage +from langgraph.graph import END +from langgraph.types import Command + +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import CoreGraphAdapter + + +# ── Shared test adapter ──────────────────────────────────────────────────── + + +class _TestAdapter(CoreGraphAdapter): + messages_key = "chat_messages" + execute_node_name = "sandbox" + metadata_key = "cuga_lite_metadata" + + def get_messages(self, state: Any) -> List[BaseMessage]: + return list(state.chat_messages or []) + + def resolve_max_steps(self, state: Any, override: Optional[int]) -> int: + return override if override is not None else getattr(state, "_max_steps", 50) + + +# ── Test state factory ───────────────────────────────────────────────────── + + +def _make_state( + messages=None, + step_count=0, + max_steps=50, + prepared_prompt="You are a helpful agent.", + metadata=None, +): + vm = MagicMock() + vm.get_variable_names.return_value = [] + return SimpleNamespace( + chat_messages=messages or [HumanMessage(content="do task")], + step_count=step_count, + _max_steps=max_steps, + prepared_prompt=prepared_prompt, + cuga_lite_metadata=metadata or {}, + variables_storage=None, + variable_counter_state=None, + variable_creation_order=None, + variables_manager=vm, + ) + + +# ── Mock helpers ─────────────────────────────────────────────────────────── + + +def _mock_response(content: str): + return SimpleNamespace(content=content, additional_kwargs={}) + + +def _mock_model(content: str): + model = MagicMock() + model.ainvoke = AsyncMock(return_value=_mock_response(content)) + return model + + +def _mock_settings(policy_enabled=False): + adv = SimpleNamespace(cuga_lite_max_steps=50) + policy = SimpleNamespace(enabled=policy_enabled) + return SimpleNamespace(advanced_features=adv, policy=policy) + + +# The factory under test — imported lazily so we see ImportError (RED) clearly. +def _get_factory(): + from cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_nodes import ( + create_call_model_node, + ) + + return create_call_model_node + + +# ── 1. Code path routes to execute_node_name ────────────────────────────── + + +@pytest.mark.asyncio +@patch( + "cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_nodes.apply_context_summarization", + new_callable=AsyncMock, +) +async def test_code_path_routes_to_execute_node(mock_summarize): + mock_summarize.side_effect = lambda messages, *args, **kwargs: messages + + adapter = _TestAdapter() + state = _make_state() + model = _mock_model("```python\nprint('hi')\n```") + settings = _mock_settings() + + node = _get_factory()(adapter, model, settings) + result = await node(state, config=None) + + assert isinstance(result, Command) + assert result.goto == "sandbox" + assert result.update["script"] == "print('hi')" + assert result.update["step_count"] == 1 + assert result.update["chat_messages"][-1].content == "```python\nprint('hi')\n```" + + +# ── 2. No-code path routes to END ───────────────────────────────────────── + + +@pytest.mark.asyncio +@patch( + "cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_nodes.apply_context_summarization", + new_callable=AsyncMock, +) +async def test_no_code_path_routes_to_end(mock_summarize): + mock_summarize.side_effect = lambda messages, *args, **kwargs: messages + + adapter = _TestAdapter() + state = _make_state() + model = _mock_model("The answer is 42.") + settings = _mock_settings() + + node = _get_factory()(adapter, model, settings) + result = await node(state, config=None) + + assert isinstance(result, Command) + assert result.goto == END + assert result.update["final_answer"] == "The answer is 42." + assert result.update["execution_complete"] is True + assert result.update["step_count"] == 1 + + +# ── 3. Step limit in code path ──────────────────────────────────────────── + + +@pytest.mark.asyncio +@patch( + "cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_nodes.apply_context_summarization", + new_callable=AsyncMock, +) +async def test_step_limit_in_code_path_routes_to_end_with_error(mock_summarize): + mock_summarize.side_effect = lambda messages, *args, **kwargs: messages + + adapter = _TestAdapter() + # step_count=50 means new_step_count=51 which exceeds max_steps=50 + state = _make_state(step_count=50, max_steps=50) + model = _mock_model("```python\nprint('hi')\n```") + settings = _mock_settings() + + node = _get_factory()(adapter, model, settings) + result = await node(state, config=None) + + assert isinstance(result, Command) + assert result.goto == END + assert "Maximum step limit" in result.update.get( + "error", "" + ) or "Maximum step limit" in result.update.get("final_answer", "") + + +# ── 4. Step limit in no-code path ───────────────────────────────────────── + + +@pytest.mark.asyncio +@patch( + "cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_nodes.apply_context_summarization", + new_callable=AsyncMock, +) +async def test_step_limit_in_no_code_path_routes_to_end_with_error(mock_summarize): + mock_summarize.side_effect = lambda messages, *args, **kwargs: messages + + adapter = _TestAdapter() + state = _make_state(step_count=50, max_steps=50) + model = _mock_model("The answer is 42.") + settings = _mock_settings() + + node = _get_factory()(adapter, model, settings) + result = await node(state, config=None) + + assert isinstance(result, Command) + assert result.goto == END + assert "Maximum step limit" in result.update.get( + "error", "" + ) or "Maximum step limit" in result.update.get("final_answer", "") + + +# ── 5. Auto-continue loops back to call_model ───────────────────────────── + + +class _AutoContinueAdapter(_TestAdapter): + async def classify_auto_continue( + self, state: Any, model: Any, content: str, reasoning: Optional[str] + ) -> bool: + return True # always continue + + +@pytest.mark.asyncio +@patch( + "cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_nodes.apply_context_summarization", + new_callable=AsyncMock, +) +async def test_auto_continue_loops_back_to_call_model(mock_summarize): + mock_summarize.side_effect = lambda messages, *args, **kwargs: messages + + adapter = _AutoContinueAdapter() + state = _make_state() + model = _mock_model("Let me think about this first.") + settings = _mock_settings() + + node = _get_factory()(adapter, model, settings) + result = await node(state, config=None) + + assert isinstance(result, Command) + assert result.goto == "call_model" + msgs = result.update["chat_messages"] + assert msgs[-1].content == "continue" + assert result.update["execution_complete"] is False + + +# ── 6. messages_key and execute_node_name are respected ─────────────────── + + +class _SupervisorLikeAdapter(_TestAdapter): + messages_key = "supervisor_chat_messages" + execute_node_name = "execute_agent_tool" + metadata_key = "supervisor_metadata" + + def get_messages(self, state: Any) -> List[BaseMessage]: + return list(getattr(state, "supervisor_chat_messages", None) or []) + + +@pytest.mark.asyncio +@patch( + "cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_nodes.apply_context_summarization", + new_callable=AsyncMock, +) +async def test_adapter_messages_key_and_execute_node_name_respected(mock_summarize): + mock_summarize.side_effect = lambda messages, *args, **kwargs: messages + + adapter = _SupervisorLikeAdapter() + state = SimpleNamespace( + supervisor_chat_messages=[HumanMessage(content="orchestrate")], + step_count=0, + _max_steps=50, + prepared_prompt="Supervisor prompt.", + supervisor_metadata={}, + variables_storage=None, + variable_counter_state=None, + variable_creation_order=None, + variables_manager=MagicMock(get_variable_names=lambda: []), + ) + model = _mock_model("```python\nawait delegate_to_agent('do x')\n```") + settings = _mock_settings() + + node = _get_factory()(adapter, model, settings) + result = await node(state, config=None) + + assert result.goto == "execute_agent_tool" + assert "supervisor_chat_messages" in result.update + assert "execute_agent_tool" == result.goto + + +# ── 7. Configurable llm overrides base_model ────────────────────────────── + + +@pytest.mark.asyncio +@patch( + "cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_nodes.apply_context_summarization", + new_callable=AsyncMock, +) +async def test_configurable_llm_overrides_base_model(mock_summarize): + mock_summarize.side_effect = lambda messages, *args, **kwargs: messages + + adapter = _TestAdapter() + state = _make_state() + base_model = _mock_model("should not be called") + override_model = _mock_model("The answer is 7.") + settings = _mock_settings() + + node = _get_factory()(adapter, base_model, settings) + config = {"configurable": {"llm": override_model}} + result = await node(state, config=config) + + # override_model was invoked, base_model was not + override_model.ainvoke.assert_called_once() + base_model.ainvoke.assert_not_called() + assert result.update["final_answer"] == "The answer is 7." diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_shared_graph_builder.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_shared_graph_builder.py new file mode 100644 index 00000000..4234eda5 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_shared_graph_builder.py @@ -0,0 +1,150 @@ +"""Phase 3 — build_agent_graph shared graph builder tests. + +Pins that: +1. The returned compiled graph has the expected node names (prepare, call_model, + and the adapter's execute_node_name). +2. START → prepare → call_model edges exist. +3. The graph compiles without error given a valid state class and mock nodes. +4. Different execute_node_name values from the adapter are reflected in the graph. +""" + +from __future__ import annotations + +from typing import Any, List, Optional + +from langchain_core.messages import BaseMessage +from pydantic import BaseModel + +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import CoreGraphAdapter + + +# ── Minimal adapter & state for graph construction ───────────────────────── + + +class _MinimalAdapter(CoreGraphAdapter): + messages_key = "chat_messages" + execute_node_name = "sandbox" + + def get_messages(self, state: Any) -> List[BaseMessage]: + return [] + + def resolve_max_steps(self, state: Any, override: Optional[int]) -> int: + return 50 + + +class _SupervisorAdapter(_MinimalAdapter): + execute_node_name = "execute_agent_tool" + messages_key = "supervisor_chat_messages" + + +class _MockState(BaseModel): + chat_messages: list = [] + step_count: int = 0 + prepared_prompt: str = "" + cuga_lite_metadata: dict = {} + variables_storage: dict = {} + + +def _noop_node(): + async def node(state, config=None): + from langgraph.types import Command + from langgraph.graph import END + + return Command(goto=END, update={"final_answer": "done", "execution_complete": True}) + + return node + + +def _get_builder(): + from cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_graph import build_agent_graph + + return build_agent_graph + + +# ── 1. Graph compiles with Lite-like adapter ─────────────────────────────── + + +def test_build_agent_graph_compiles_with_lite_adapter(): + build = _get_builder() + adapter = _MinimalAdapter() + + graph = build( + adapter=adapter, + state_class=_MockState, + prepare_node=_noop_node(), + call_model_node=_noop_node(), + execute_node=_noop_node(), + ) + + assert graph is not None + + +# ── 2. Graph has expected node names (Lite) ──────────────────────────────── + + +def test_built_graph_has_prepare_and_call_model_nodes(): + build = _get_builder() + adapter = _MinimalAdapter() + + graph = build( + adapter=adapter, + state_class=_MockState, + prepare_node=_noop_node(), + call_model_node=_noop_node(), + execute_node=_noop_node(), + ) + + node_names = set(graph.nodes.keys()) + assert "prepare" in node_names + assert "call_model" in node_names + assert "sandbox" in node_names # adapter.execute_node_name + + +# ── 3. Graph has expected node names (Supervisor-like) ──────────────────── + + +def test_built_graph_uses_adapter_execute_node_name(): + build = _get_builder() + adapter = _SupervisorAdapter() + + class _SupervisorState(BaseModel): + supervisor_chat_messages: list = [] + step_count: int = 0 + prepared_prompt: str = "" + supervisor_metadata: dict = {} + variables_storage: dict = {} + + graph = build( + adapter=adapter, + state_class=_SupervisorState, + prepare_node=_noop_node(), + call_model_node=_noop_node(), + execute_node=_noop_node(), + ) + + node_names = set(graph.nodes.keys()) + assert "execute_agent_tool" in node_names + assert "sandbox" not in node_names + + +# ── 4. Graph returns an uncompiled StateGraph (caller compiles with checkpointer) ─ + + +def test_built_graph_is_uncompiled_state_graph(): + from langgraph.graph import StateGraph + + build = _get_builder() + adapter = _MinimalAdapter() + + graph = build( + adapter=adapter, + state_class=_MockState, + prepare_node=_noop_node(), + call_model_node=_noop_node(), + execute_node=_noop_node(), + ) + + assert isinstance(graph, StateGraph) + assert hasattr(graph, "compile") + compiled = graph.compile() + assert hasattr(compiled, "ainvoke") diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_supervisor_feature_parity.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_supervisor_feature_parity.py new file mode 100644 index 00000000..e2915c4c --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_supervisor_feature_parity.py @@ -0,0 +1,180 @@ +"""Phase 11 — Supervisor feature-parity regression tests. + +These tests pin behaviors that were added to both Lite and Supervisor during +the modular sandbox refactor but were not directly covered by earlier test +slices: + +1. Real ``_SUPERVISOR_LOOP_ADAPTER.get_variable_manager`` returns + ``state.supervisor_variables_manager`` (Phase-9 coupling fix). +2. ``filter(None, [...])`` join pattern used in both graphs' prepare nodes + produces the expected combined special-instructions string. +3. ``InvokeResult.variables`` round-trips through ``VariableBridge.extract_values`` + so the variable dict exposed to callers matches what the sub-agent stored. +4. ``split_execution_note`` is non-empty exactly when the plan is split-active, + ensuring both graphs' ``filter(None, [...])`` joins include it correctly. +5. ``VariableBridge.bridge`` skips entries with ``None`` values so the supervisor + namespace only contains non-null sub-agent variables. +""" + +from __future__ import annotations + +from types import SimpleNamespace + +import pytest + + +# ── 1. Real _SUPERVISOR_LOOP_ADAPTER.get_variable_manager ───────────────── + + +def test_real_supervisor_adapter_get_variable_manager_returns_supervisor_vm(): + """Phase-9 pin: _SUPERVISOR_LOOP_ADAPTER.get_variable_manager must return + state.supervisor_variables_manager (not state.variables_manager). + + If this breaks, CodeExecutor will silently write sub-task variables into + the root agent's manager instead of the supervisor's. + """ + from cuga.backend.cuga_graph.nodes.cuga_supervisor.cuga_supervisor_graph import ( + _SUPERVISOR_LOOP_ADAPTER, + ) + + sentinel = object() + state = SimpleNamespace(supervisor_variables_manager=sentinel) + assert _SUPERVISOR_LOOP_ADAPTER.get_variable_manager(state) is sentinel + + +# ── 2. filter(None, [...]) join pattern ─────────────────────────────────── + + +def test_special_instructions_join_all_present(): + """All three components (base, skills_section, split_note) are joined with \\n\\n.""" + base = "Do X." + skills = "foo" + note = "Split active." + result = "\n\n".join(filter(None, [base, skills, note])) or None + assert result == "Do X.\n\nfoo\n\nSplit active." + + +def test_special_instructions_join_skips_none_and_empty(): + """None and empty-string components are dropped from the join.""" + base = "Do X." + skills = "" # empty → filtered out + note = None # None → filtered out + result = "\n\n".join(filter(None, [base, skills, note])) or None + assert result == "Do X." + + +def test_special_instructions_join_all_empty_returns_none(): + """When all components are empty, the join returns None (not empty string).""" + result = "\n\n".join(filter(None, [None, "", None])) or None + assert result is None + + +def test_special_instructions_join_only_note_present(): + """Only split note (no base or skills) still produces a valid string.""" + note = "Split active." + result = "\n\n".join(filter(None, [None, "", note])) or None + assert result == "Split active." + + +# ── 3. InvokeResult.variables round-trip ───────────────────────────────── + + +def test_invoke_result_variables_round_trips_via_extract_values(): + """Variables stored in variables_storage and extracted by VariableBridge + are identical to what InvokeResult.variables carries. + + This pins the contract between CugaAgent.invoke() and the bridge wiring. + """ + from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + from cuga.sdk import InvokeResult + + # Simulate what the graph result dict contains + variables_storage = { + "order_id": {"value": "ORD-42", "type": "str", "description": ""}, + "total": {"value": 199.99, "type": "float", "description": ""}, + } + extracted = VariableBridge.extract_values(variables_storage) + + # Build InvokeResult as CugaAgent.invoke() would + result = InvokeResult(answer="done", variables=extracted) + + assert result.variables["order_id"] == "ORD-42" + assert result.variables["total"] == pytest.approx(199.99) + # Metadata (type, description) is stripped — only raw values survive + assert set(result.variables.keys()) == {"order_id", "total"} + + +def test_invoke_result_variables_empty_storage_gives_empty_dict(): + """Empty variables_storage → InvokeResult.variables == {}.""" + from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + from cuga.sdk import InvokeResult + + result = InvokeResult(answer="done", variables=VariableBridge.extract_values({})) + assert result.variables == {} + + +# ── 4. split_execution_note in filter(None, ...) context ───────────────── + + +def test_split_note_absent_means_join_skips_it(): + """When split execution is not active, split_execution_note returns '', + and the join correctly omits it.""" + from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution_policy import ( + ExecutionPlan, + split_execution_note, + ) + + plan = ExecutionPlan( + requested_backend="local", + python_backend="local", + shell_backend="none", + filesystem_backend="none", + ) + note = split_execution_note(plan) + base = "Base instructions." + result = "\n\n".join(filter(None, [base, note])) or None + assert result == "Base instructions." + + +def test_split_note_present_means_join_includes_it(): + """When split execution is active, the note is non-empty and joins.""" + from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution_policy import ( + ExecutionPlan, + split_execution_note, + ) + + plan = ExecutionPlan( + requested_backend="local", + python_backend="local", + shell_backend="opensandbox", + filesystem_backend="none", + ) + note = split_execution_note(plan) + base = "Base." + result = "\n\n".join(filter(None, [base, note])) or None + assert result is not None + assert "Base." in result + assert note in result + + +# ── 5. VariableBridge.bridge skips None values ──────────────────────────── + + +def test_bridge_skips_none_values(): + """bridge() must not write None values into the target VM. + + None-valued variables are noise from sub-agents that never assigned the + variable; writing them pollutes the supervisor namespace with un-usable + placeholders. + """ + from cuga.backend.cuga_graph.state.agent_state import VariablesManager + from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + + target = VariablesManager() + bridged = VariableBridge.bridge({"good": 42, "bad": None}, target) + + assert "good" in target.get_variable_names() + assert target.get_variable("good") == 42 + # None-valued entries must be silently dropped, not written + assert "bad" not in target.get_variable_names() + assert "bad" not in bridged diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_supervisor_tool_provider.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_supervisor_tool_provider.py new file mode 100644 index 00000000..e0b38dba --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_supervisor_tool_provider.py @@ -0,0 +1,153 @@ +"""Phase 7 — Supervisor tool_provider API + wiring tests. + +Pins two behaviors: +1. ``create_cuga_supervisor_graph`` accepts ``tool_provider`` (backward-compat + default ``None`` + a real provider object). +2. When a tool_provider is supplied, ``get_all_tools()`` results are wired: + - as async callables in the agent_tools_context (via make_tool_awaitable) + - as prompt dicts for the Jinja template (via prompt_tool_dicts) + +The nested closure inside ``_create_supervisor_conversational_graph`` cannot be +isolated without full graph construction, so we test the *mechanism* in +isolation: the same ``make_tool_awaitable`` + ``prompt_tool_dicts`` path that +the wiring will use. +""" + +from __future__ import annotations + +import inspect +from typing import List +from unittest.mock import MagicMock + +import pytest +from langchain_core.tools import StructuredTool + +from cuga.backend.cuga_graph.nodes.cuga_agent_core.code_extraction import make_tool_awaitable +from cuga.backend.cuga_graph.nodes.cuga_agent_core.runtime_tools import prompt_tool_dicts +from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ToolProviderInterface + + +# ── helpers ──────────────────────────────────────────────────────────────── + + +def _make_structured_tool(name: str) -> StructuredTool: + async def _fn(query: str) -> str: + return f"{name}: {query}" + + return StructuredTool.from_function( + coroutine=_fn, + name=name, + description=f"Tool {name}", + ) + + +class FakeToolProvider(ToolProviderInterface): + """Minimal in-process provider that returns a fixed list of tools.""" + + def __init__(self, tools: List[StructuredTool]) -> None: + self._tools = tools + self.get_all_tools_call_count = 0 + + async def initialize(self) -> None: + pass + + async def get_apps(self): + return [] + + async def get_tools(self, app_name: str): + return self._tools + + async def get_all_tools(self) -> List[StructuredTool]: + self.get_all_tools_call_count += 1 + return list(self._tools) + + +# ── API surface ──────────────────────────────────────────────────────────── + + +def test_create_cuga_supervisor_graph_accepts_tool_provider_none(): + """tool_provider=None (default) must not raise at graph construction time.""" + from cuga.backend.cuga_graph.nodes.cuga_supervisor.cuga_supervisor_graph import ( + create_cuga_supervisor_graph, + ) + + model = MagicMock() + model.bind_tools = MagicMock(return_value=model) + agents = {} + + # Should not raise — backward-compatible default + graph = create_cuga_supervisor_graph(model, agents, tool_provider=None) + assert graph is not None + + +def test_create_cuga_supervisor_graph_accepts_tool_provider_instance(): + """Passing a FakeToolProvider must not raise at graph construction time.""" + from cuga.backend.cuga_graph.nodes.cuga_supervisor.cuga_supervisor_graph import ( + create_cuga_supervisor_graph, + ) + + model = MagicMock() + model.bind_tools = MagicMock(return_value=model) + agents = {} + provider = FakeToolProvider([_make_structured_tool("search")]) + + graph = create_cuga_supervisor_graph(model, agents, tool_provider=provider) + assert graph is not None + + +# ── mechanism: make_tool_awaitable wraps provider tools ─────────────────── + + +@pytest.mark.asyncio +async def test_provider_tool_becomes_awaitable_via_make_tool_awaitable(): + """StructuredTool from provider is callable as an async function after wrapping.""" + tool = _make_structured_tool("lookup") + tool_func = tool.coroutine or tool.func + wrapped = make_tool_awaitable(tool_func) + assert inspect.iscoroutinefunction(wrapped) + result = await wrapped(query="hello") + assert result == "lookup: hello" + + +def test_provider_tools_produce_valid_prompt_dicts(): + """prompt_tool_dicts converts provider StructuredTools to Supervisor template format.""" + tools = [_make_structured_tool("search"), _make_structured_tool("fetch")] + dicts = prompt_tool_dicts(tools) + + assert len(dicts) == 2 + for d in dicts: + assert "name" in d + assert "description" in d + assert "params_str" in d + assert "params_doc" in d + assert "response_doc" in d + + +def test_provider_tools_are_keyed_by_name_in_context(): + """When tools are loaded from provider, agent_tools_context uses tool.name as key.""" + tools = [_make_structured_tool("alpha"), _make_structured_tool("beta")] + agent_tools_context = {} + + for t in tools: + tool_func = t.coroutine if getattr(t, "coroutine", None) else getattr(t, "func", None) + if tool_func: + agent_tools_context[t.name] = make_tool_awaitable(tool_func) + + assert "alpha" in agent_tools_context + assert "beta" in agent_tools_context + assert inspect.iscoroutinefunction(agent_tools_context["alpha"]) + assert inspect.iscoroutinefunction(agent_tools_context["beta"]) + + +# ── mechanism: get_all_tools is awaited ─────────────────────────────────── + + +@pytest.mark.asyncio +async def test_fake_provider_get_all_tools_returns_tools(): + """FakeToolProvider.get_all_tools() returns the injected list (sanity check).""" + tools = [_make_structured_tool("ping")] + provider = FakeToolProvider(tools) + result = await provider.get_all_tools() + assert len(result) == 1 + assert result[0].name == "ping" + assert provider.get_all_tools_call_count == 1 diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_tool_approval_adapter.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_tool_approval_adapter.py new file mode 100644 index 00000000..65ead1b1 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_tool_approval_adapter.py @@ -0,0 +1,170 @@ +"""Behavior-pinning tests for the adapter-ized ToolApprovalHandler. + +The handler used to be hard-coupled to ``CugaLiteState`` (``cuga_lite_metadata``, +``chat_messages``, ``goto="sandbox"``, ``sender="CugaLite"``). It now takes a +``CoreGraphAdapter`` whose approval seams default to exactly the legacy Lite +values, so Lite behavior must be byte-identical. Expected values below are +the *current* Lite behavior, locked before/after the refactor. +""" + +from __future__ import annotations + +from types import SimpleNamespace + +from langchain_core.messages import AIMessage, HumanMessage +from langgraph.graph import END + +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import CoreGraphAdapter +from cuga.backend.cuga_graph.nodes.cuga_lite.tool_approval_handler import ToolApprovalHandler + + +class LiteLikeAdapter(CoreGraphAdapter): + """Mirrors _CugaLiteLoopAdapter's seam values (the defaults).""" + + messages_key = "chat_messages" + + def get_messages(self, state): + return state.chat_messages + + def resolve_max_steps(self, state, override): + return override if override is not None else 999 + + +class SupLikeAdapter(CoreGraphAdapter): + """Mirrors _CugaSupervisorLoopAdapter's approval seam overrides.""" + + messages_key = "supervisor_chat_messages" + metadata_key = "supervisor_metadata" + execute_node_name = "execute_agent_tool" + sender_name = "CugaSupervisor" + + def get_messages(self, state): + return state.supervisor_chat_messages or [] + + def resolve_max_steps(self, state, override): + return override if override is not None else 999 + + +# ─── CoreGraphAdapter approval-seam defaults (must equal legacy Lite) ──────── + + +def test_default_approval_seams_are_lite_values(): + a = LiteLikeAdapter() + assert a.metadata_key == "cuga_lite_metadata" + assert a.execute_node_name == "sandbox" + assert a.sender_name == "CugaLite" + st = SimpleNamespace(cuga_lite_metadata={"x": 1}) + assert a.get_metadata(st) == {"x": 1} + a.set_metadata(st, {"y": 2}) + assert st.cuga_lite_metadata == {"y": 2} + # absent -> {} + assert a.get_metadata(SimpleNamespace()) == {} + + +def test_supervisor_adapter_overrides_seams(): + a = SupLikeAdapter() + assert a.metadata_key == "supervisor_metadata" + assert a.execute_node_name == "execute_agent_tool" + assert a.sender_name == "CugaSupervisor" + st = SimpleNamespace(supervisor_metadata={"p": 1}) + assert a.get_metadata(st) == {"p": 1} + a.set_metadata(st, {"q": 9}) + assert st.supervisor_metadata == {"q": 9} + + +# ─── pure handler methods (adapter-parameterized) ─────────────────────────── + + +def test_should_skip_and_is_returning_from_approval(): + a = LiteLikeAdapter() + yes = SimpleNamespace(cuga_lite_metadata={"user_approved": True}) + no = SimpleNamespace(cuga_lite_metadata={}) + assert ToolApprovalHandler.should_skip_policy_check(a, yes) is True + assert ToolApprovalHandler.should_skip_policy_check(a, no) is False + assert ToolApprovalHandler.is_returning_from_approval(a, yes) is True + assert ToolApprovalHandler.is_returning_from_approval(a, no) is False + + +def test_extract_approved_code_reads_via_adapter_messages(): + a = LiteLikeAdapter() + st = SimpleNamespace( + chat_messages=[ + HumanMessage(content="do it"), + AIMessage(content="```python\nprint('x')\n```"), + ] + ) + assert ToolApprovalHandler.extract_approved_code(a, st) == "print('x')" + st2 = SimpleNamespace(chat_messages=[HumanMessage(content="no ai")]) + assert ToolApprovalHandler.extract_approved_code(a, st2) is None + + +def test_clean_approval_metadata_strips_known_fields(): + md = { + "keep": 1, + "approval_required": True, + "user_approved": True, + "required_tools": [], + "required_apps": [], + "full_code": "x", + "code_preview": ["a"], + } + assert ToolApprovalHandler.clean_approval_metadata(md) == {"keep": 1} + + +def test_handle_denial_lite_shape(): + a = LiteLikeAdapter() + denied = SimpleNamespace(cuga_lite_metadata={"user_approved": False}, step_count=4) + cmd = ToolApprovalHandler.handle_denial(a, denied) + assert cmd.goto == END + assert cmd.update == { + "execution_complete": True, + "final_answer": "Execution cancelled by user.", + "step_count": 5, + } + assert ToolApprovalHandler.handle_denial(a, SimpleNamespace(cuga_lite_metadata={})) is None + + +def test_handle_approval_resumption_routes_to_adapter_execute_node(): + a = LiteLikeAdapter() + st = SimpleNamespace( + chat_messages=[AIMessage(content="```python\nprint('go')\n```")], + cuga_lite_metadata={"user_approved": True, "policy_name": "P"}, + step_count=2, + ) + cmd = ToolApprovalHandler.handle_approval_resumption(a, st) + assert cmd.goto == "sandbox" + assert cmd.update["script"] == "print('go')" + assert cmd.update["step_count"] == 3 + # cleaned metadata written under the adapter's metadata_key + assert "user_approved" not in cmd.update["cuga_lite_metadata"] + assert cmd.update["cuga_lite_metadata"]["policy_name"] == "P" + + +def test_real_supervisor_adapter_has_correct_approval_seams(): + """The real _CugaSupervisorLoopAdapter must carry supervisor seam values + (not the Lite defaults) once Slice B wires approval into Supervisor.""" + from cuga.backend.cuga_graph.nodes.cuga_supervisor.cuga_supervisor_graph import ( + _SUPERVISOR_LOOP_ADAPTER, + ) + + a = _SUPERVISOR_LOOP_ADAPTER + assert a.metadata_key == "supervisor_metadata" + assert a.execute_node_name == "execute_agent_tool" + assert a.sender_name == "CugaSupervisor" + st = SimpleNamespace(supervisor_metadata={"k": 1}) + assert a.get_metadata(st) == {"k": 1} + a.set_metadata(st, {"k": 2}) + assert st.supervisor_metadata == {"k": 2} + + +def test_handle_approval_resumption_supervisor_seams(): + a = SupLikeAdapter() + st = SimpleNamespace( + supervisor_chat_messages=[AIMessage(content="```python\nprint('go')\n```")], + supervisor_metadata={"user_approved": True}, + step_count=0, + ) + cmd = ToolApprovalHandler.handle_approval_resumption(a, st) + assert cmd.goto == "execute_agent_tool" + assert "supervisor_metadata" in cmd.update + assert cmd.update["script"] == "print('go')" diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_variable_bridge.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_variable_bridge.py new file mode 100644 index 00000000..655adcad --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_variable_bridge.py @@ -0,0 +1,149 @@ +"""Phase 8 — VariableBridge tests. + +Pins three behaviors: +1. VariableBridge.extract_values converts variables_storage format → {name: value}. +2. VariableBridge.bridge copies values into a target VariablesManager. +3. InvokeResult carries a `variables` field (default empty, populated from + sub-agent graph state after invocation). +4. Mechanism test: delegate_to_agent bridges variables via the shared VM ref + that execute_agent_tool populates before each code run. +""" + +from __future__ import annotations + +from typing import Any, List + +from cuga.backend.cuga_graph.state.agent_state import VariablesManager + + +# ── VariableBridge utility ───────────────────────────────────────────────── + + +def test_extract_values_returns_name_value_dict(): + """extract_values strips metadata, leaving {name: raw_value}.""" + from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + + storage = { + "amount": {"value": 99, "description": "desc", "type": "int", "created_at": "...", "count_items": 0}, + "name": {"value": "Alice", "description": "", "type": "str", "created_at": "...", "count_items": 5}, + } + result = VariableBridge.extract_values(storage) + assert result == {"amount": 99, "name": "Alice"} + + +def test_extract_values_skips_entries_without_value_key(): + """Malformed storage entries (no 'value' key) are silently skipped.""" + from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + + storage = { + "good": {"value": 42, "type": "int"}, + "bad": {"type": "int"}, # no 'value' + } + result = VariableBridge.extract_values(storage) + assert result == {"good": 42} + assert "bad" not in result + + +def test_extract_values_empty_storage_returns_empty(): + from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + + assert VariableBridge.extract_values({}) == {} + + +def test_bridge_copies_values_into_target_manager(): + """bridge() writes each value into the target VariablesManager under its name.""" + from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + + target = VariablesManager() + bridged = VariableBridge.bridge({"x": 10, "y": "hello"}, target) + + assert "x" in target.get_variable_names() + assert "y" in target.get_variable_names() + assert target.get_variable("x") == 10 + assert target.get_variable("y") == "hello" + assert set(bridged) == {"x", "y"} + + +def test_bridge_empty_source_returns_empty_list_and_leaves_manager_unchanged(): + from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + + target = VariablesManager() + target.add_variable(1, name="pre_existing") + bridged = VariableBridge.bridge({}, target) + + assert bridged == [] + assert "pre_existing" in target.get_variable_names() + + +def test_bridge_description_prefix_is_applied(): + """bridge() stores variables with the given description prefix.""" + from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + + target = VariablesManager() + VariableBridge.bridge({"result": 7}, target, description_prefix="from customer_agent") + meta = target.variables.get("result") + assert meta is not None + assert "from customer_agent" in meta.description + + +# ── InvokeResult.variables field ────────────────────────────────────────── + + +def test_invoke_result_has_variables_field_defaulting_to_empty(): + """InvokeResult.variables exists and defaults to an empty dict.""" + from cuga.sdk import InvokeResult + + r = InvokeResult(answer="done") + assert hasattr(r, "variables") + assert r.variables == {} + + +def test_invoke_result_variables_accepts_name_value_dict(): + """InvokeResult.variables can be set to a {name: value} dict.""" + from cuga.sdk import InvokeResult + + r = InvokeResult(answer="ok", variables={"amount": 100, "name": "Alice"}) + assert r.variables["amount"] == 100 + assert r.variables["name"] == "Alice" + + +# ── Mechanism: shared VM ref bridges variables from sub-agent ───────────── + + +def test_shared_vm_ref_allows_delegate_to_write_into_supervisor_vm(): + """VariableBridge.bridge called with a ref populated at execution time writes to the target VM. + + This is the mechanism test for the _shared_vm_ref[0] pattern: + - execute_agent_tool sets _shared_vm_ref[0] = state.supervisor_variables_manager + - delegate_to_agent reads _shared_vm_ref[0] and calls VariableBridge.bridge(...) + - The target VM accumulates the bridged variables + """ + from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + + _shared_vm_ref: List[Any] = [None] + + supervisor_vm = VariablesManager() + _shared_vm_ref[0] = supervisor_vm # simulate execute_agent_tool setting the ref + + # simulate delegate_to_agent bridging after sub-agent returns variables + sub_agent_vars = {"order_id": "ORD-42", "total": 199.99} + if _shared_vm_ref[0] is not None: + VariableBridge.bridge(sub_agent_vars, _shared_vm_ref[0], description_prefix="from order_agent") + + assert "order_id" in supervisor_vm.get_variable_names() + assert supervisor_vm.get_variable("order_id") == "ORD-42" + assert "total" in supervisor_vm.get_variable_names() + + +def test_shared_vm_ref_none_skips_bridge_gracefully(): + """When _shared_vm_ref[0] is None (before first execute), bridge is skipped.""" + from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + + _shared_vm_ref: List[Any] = [None] + sub_agent_vars = {"result": "ok"} + + # Guard: only bridge when ref is set (not None) + if _shared_vm_ref[0] is not None: + VariableBridge.bridge(sub_agent_vars, _shared_vm_ref[0]) + + # No exception; nothing to assert (VM was never written) diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/variable_bridge.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/variable_bridge.py new file mode 100644 index 00000000..aa4b37f3 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/variable_bridge.py @@ -0,0 +1,59 @@ +"""VariableBridge — cross-agent variable handoff utility. + +Copies variables computed by a sub-agent (carried in ``InvokeResult.variables``) +into the Supervisor's ``supervisor_variables_manager``. + +The bridge is driven by a *shared mutable reference* (``_shared_vm_ref: List``) +that ``execute_agent_tool`` refreshes to the current state's VM before each code +execution. ``delegate_to_agent`` reads from that ref after the sub-agent returns, +so variables land in the correct state snapshot even though the delegation closure +is built at graph-prepare time. +""" + +from __future__ import annotations + +from typing import Any, Dict, List + + +class VariableBridge: + """Utility for copying variables between agents.""" + + @staticmethod + def extract_values(variables_storage: Dict[str, Dict[str, Any]]) -> Dict[str, Any]: + """Convert ``variables_storage`` serialised format → ``{name: raw_value}``. + + Entries that lack a ``'value'`` key are silently skipped so callers + never need to guard against malformed storage. + """ + return {name: meta["value"] for name, meta in variables_storage.items() if "value" in meta} + + @staticmethod + def bridge( + source_values: Dict[str, Any], + target_manager: Any, + description_prefix: str = "bridged", + ) -> List[str]: + """Copy ``{name: value}`` pairs into *target_manager*. + + Args: + source_values: Plain ``{name: value}`` dict (e.g. from + ``extract_values`` or ``InvokeResult.variables``). + target_manager: Any ``VariablesManager``-compatible object that + implements ``add_variable(value, name, description)``. + description_prefix: Prepended to each variable's description so + the supervisor knows where the value originated. + + Returns: + List of variable names that were written. + """ + bridged: List[str] = [] + for name, value in source_values.items(): + if value is None: + continue + target_manager.add_variable( + value, + name=name, + description=f"{description_prefix}: {name}", + ) + bridged.append(name) + return bridged diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/agent_graph_adapter.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/agent_graph_adapter.py new file mode 100644 index 00000000..70b89d84 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/agent_graph_adapter.py @@ -0,0 +1,244 @@ +"""AgentGraphAdapter — CoreGraphAdapter implementation for CugaLite (single-agent). + +Provides all hook overrides that the shared ``create_call_model_node`` factory +delegates to for Lite-specific behaviour: + +- Few-shot messages, PI injection, todos system block +- normalize_response: normalize_assistant_text + tool-call code recovery +- Tracker side-effects, Langfuse callbacks +- Metadata cleanup (_clean_empty_response_retry_meta) +- NL auto-continue via classify_nl_auto_continue + +Also houses the format_task_todos_system_block / format_current_plan_section +helpers that were previously defined in cuga_lite_graph.py; cuga_lite_graph.py +imports them from here in Phase 6. +""" + +from __future__ import annotations + +import json +from typing import Any, Dict, List, Optional, Tuple + +from langchain_core.messages import BaseMessage +from loguru import logger + +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import CoreGraphAdapter +from cuga.backend.cuga_graph.nodes.cuga_lite.nl_auto_continue_classifier import ( + classify_nl_auto_continue, + normalize_assistant_text, +) +from cuga.backend.llm.errors import extract_code_from_tool_use_failed +from cuga.config import settings + + +# ── Helpers (moved from cuga_lite_graph.py) ──────────────────────────────── + + +def _clean_empty_response_retry_meta(meta: Optional[Dict[str, Any]]) -> Dict[str, Any]: + m = {**(meta or {})} + m.pop("_empty_response_correction", None) + return m + + +def format_task_todos_system_block(todos: List[Dict[str, str]]) -> str: + if not todos: + return "" + lines = [ + "", + "---", + "", + "## Current task todos", + "", + "Execution only prints **Todos updated** after each change; use this list as the source of truth.", + "", + ] + for i, item in enumerate(todos, start=1): + status = item.get("status", "pending") + text = item.get("text", "") + lines.append(f"{i}. **[{status}]** {text}") + lines.append("") + return "\n".join(lines) + + +def format_current_plan_section(task_todos: List[Dict[str, Any]]) -> str: + lines = ["## Current Plan", ""] + for item in task_todos: + text = str(item.get("text", "")).strip() + status = str(item.get("status", "pending")).strip() + lines.append(f"- **[{status}]** {text}") + return "\n".join(lines) + "\n" + + +def _tool_call_kwarg_literal(value: Any) -> str: + if isinstance(value, str): + return json.dumps(value, ensure_ascii=False) + return repr(value) + + +def _extract_code_from_response_tool_calls(response: Any) -> Optional[str]: + """Recover fenced Python from AIMessage.tool_calls when content is empty.""" + tool_calls = getattr(response, "tool_calls", None) or ( + getattr(response, "additional_kwargs", None) or {} + ).get("tool_calls") + if not tool_calls: + return None + + tc = tool_calls[0] + if not isinstance(tc, dict): + return None + + name = tc.get("name") or (tc.get("function") or {}).get("name") + args = tc.get("args") or (tc.get("function") or {}).get("arguments") or {} + if isinstance(args, str): + try: + args = json.loads(args) + except json.JSONDecodeError: + args = {} + + if not name: + return None + + args_str = ", ".join( + f"{k}={_tool_call_kwarg_literal(v)}" for k, v in (args if isinstance(args, dict) else {}).items() + ) + return f"```python\nresult = await {name}({args_str})\nprint(result)\n```" + + +# ── AgentGraphAdapter ────────────────────────────────────────────────────── + + +class AgentGraphAdapter(CoreGraphAdapter): + """CoreGraphAdapter implementation for the CugaLite single-agent graph. + + Overrides all call_model hooks that differ from the no-op defaults: + few-shot messages, PI, todos, normalize, tracker, callbacks, metadata + cleanup, and NL auto-continue. + """ + + messages_key: str = "chat_messages" + execute_node_name: str = "sandbox" + metadata_key: str = "cuga_lite_metadata" + sender_name: str = "CugaLite" + + def __init__( + self, + *, + tracker: Any, + base_callbacks: Optional[List[Any]], + task_todos_ref: List[Dict[str, str]], + tools_context_ref: Optional[Dict[str, Any]], + base_tool_provider: Any, + ) -> None: + self._tracker = tracker + self._base_callbacks = base_callbacks or [] + self._task_todos_ref = task_todos_ref + self._tools_context_ref = tools_context_ref + self._base_tool_provider = base_tool_provider + + # ── Abstract method implementations ─────────────────────────────────── + + def get_messages(self, state: Any) -> List[BaseMessage]: + return list(state.chat_messages or []) + + def resolve_max_steps(self, state: Any, override: Optional[int]) -> int: + if override is not None: + return override + return ( + state.cuga_lite_max_steps + if getattr(state, "cuga_lite_max_steps", None) is not None + else getattr(settings.advanced_features, "cuga_lite_max_steps", 50) + ) + + # ── Pre-invocation hook overrides ───────────────────────────────────── + + def get_few_shot_messages(self, state: Any) -> List[Any]: + return list(state.mcp_few_shot_messages or []) + + def get_pi(self, state: Any) -> Optional[str]: + return getattr(state, "pi", None) + + def prepare_system_content(self, state: Any, configurable: dict, base_prompt: str) -> str: + if self._task_todos_ref: + return base_prompt + format_task_todos_system_block(self._task_todos_ref) + task_todos = getattr(state, "task_todos", None) + if task_todos: + return base_prompt + format_current_plan_section(task_todos) + return base_prompt + + def get_tracker(self) -> Any: + return self._tracker + + def get_invoke_config(self, configurable: dict) -> dict: + callbacks = configurable.get("callbacks", self._base_callbacks) + return {"callbacks": callbacks} + + async def ainvoke_model(self, bound: Any, messages: list, invoke_config: dict) -> Any: + try: + return await bound.ainvoke(messages, config=invoke_config) + except Exception as exc: + code = extract_code_from_tool_use_failed(exc) + if code: + logger.warning( + "Model attempted tool call without tools bound (tool_use_failed). " + "Using generated code in sandbox" + ) + + class _FakeResponse: + content = f"```python\n{code}\n```" + additional_kwargs: dict = {} + + return _FakeResponse() + raise + + async def resolve_bind_tools(self, state: Any, active_model: Any, configurable: dict) -> Any: + from cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_graph import ( + resolve_model_with_bind_tools, + ) + + try: + return await resolve_model_with_bind_tools( + active_model, + configurable=configurable, + tools_context_ref=self._tools_context_ref, + tool_provider=self._base_tool_provider, + ) + except Exception as exc: + logger.warning("AgentGraphAdapter.resolve_bind_tools failed: %s", exc) + return None + + # ── Post-invocation hook overrides ──────────────────────────────────── + + def normalize_response(self, response: Any) -> Tuple[str, Optional[str]]: + content = normalize_assistant_text(response.content) + if not content: + tool_code = _extract_code_from_response_tool_calls(response) + if tool_code: + logger.warning("Empty content with tool_calls detected; recovering tool call as Python code") + content = tool_code + reasoning = normalize_assistant_text( + (getattr(response, "additional_kwargs", None) or {}).get("reasoning_content") + ) + return content, reasoning + + def on_response_processed(self, state: Any, code: Optional[str], content: str) -> None: + try: + from cuga.backend.activity_tracker.tracker import Step + + self._tracker.collect_step(step=Step(name="Raw_Assistant_Response", data=content)) + if code: + self._tracker.collect_step(step=Step(name="Assistant_code", data=content)) + else: + self._tracker.collect_step(step=Step(name="Assistant_nl", data=content)) + except Exception as exc: + logger.debug("AgentGraphAdapter.on_response_processed tracker error: %s", exc) + + def build_metadata_update(self, state: Any, *, playbook_fired: bool) -> dict: + meta = _clean_empty_response_retry_meta(self.get_metadata(state)) + if playbook_fired: + return {**meta, "playbook_guidance_added": True} + return meta + + async def classify_auto_continue( + self, state: Any, model: Any, content: str, reasoning: Optional[str] + ) -> bool: + return await classify_nl_auto_continue(model, content, reasoning) diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/cuga_lite_graph.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/cuga_lite_graph.py index aa10010e..717b983e 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/cuga_lite_graph.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/cuga_lite_graph.py @@ -52,10 +52,7 @@ """ import os -import re import json -import asyncio -import inspect from pathlib import Path from typing import Any, Optional, Sequence, Dict, List, Tuple, Set from loguru import logger @@ -68,13 +65,12 @@ from langchain_core.callbacks import BaseCallbackHandler from langchain_core.messages import BaseMessage, AIMessage, HumanMessage -from langgraph.graph import END, START, StateGraph +from langgraph.graph import StateGraph from langgraph.types import Command from cuga.backend.cuga_graph.nodes.task_decomposition_planning.analyze_task import TaskAnalyzer from cuga.backend.activity_tracker.tracker import ActivityTracker, Step from cuga.backend.llm.models import LLMManager -from cuga.backend.llm.errors import extract_code_from_tool_use_failed from cuga.backend.cuga_graph.state.agent_state import AgentState from cuga.backend.cuga_graph.nodes.cuga_lite.prompt_utils import ( create_mcp_prompt, @@ -87,21 +83,37 @@ CodeExecutor, is_find_tools_listing_markdown, ) +from cuga.backend.cuga_graph.nodes.cuga_agent_core.code_extraction import ( + make_tool_awaitable, +) +from cuga.backend.cuga_graph.nodes.cuga_agent_core.runtime_tools import ( + build_runtime_tools, + resolve_runtime_backends, +) +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import ( + CoreGraphAdapter, + append_chat_messages_with_step_limit as _core_append_with_step_limit, + create_error_command as _core_create_error_command, + execution_output_text, +) +from cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_nodes import ( + create_call_model_node as _create_shared_call_model_node, +) +from cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_graph import build_agent_graph +from cuga.backend.cuga_graph.nodes.cuga_lite.agent_graph_adapter import AgentGraphAdapter +from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution_policy import ( + ExecutionRouter, + split_execution_note, +) from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ( - AppDefinition, ToolProviderInterface, ) from cuga.backend.cuga_graph.nodes.cuga_lite.model_runtime_profile import ( resolved_runtime_model_name, resolve_bind_tools_fields, ) -from cuga.backend.cuga_graph.nodes.cuga_lite.nl_auto_continue_classifier import ( - classify_nl_auto_continue, - normalize_assistant_text, -) from cuga.backend.cuga_graph.nodes.cuga_lite.tool_approval_handler import ToolApprovalHandler from cuga.backend.cuga_graph.policy.enactment import PolicyEnactment -from cuga.backend.cuga_graph.utils.context_management_utils import apply_context_summarization from cuga.config import settings from cuga.configurations.instructions_manager import get_all_instructions_formatted from cuga.backend.llm.utils.helpers import load_one_prompt @@ -127,8 +139,6 @@ tracker = ActivityTracker() llm_manager = LLMManager() -BACKTICK_PATTERN = r'```python(.*?)```' - def _tool_call_kwarg_literal(value: Any) -> str: """Python expression for values reconstructed from JSON tool-call arguments.""" @@ -551,53 +561,6 @@ def _decorate_knowledge_tool(tool: Any, allowed_scopes: tuple[str, ...], thread_ tool.description = f"{base_description}\n\n{hint}".strip() -def make_tool_awaitable(func): - """Wrap a sync function to make it awaitable (since agent always uses await). - - Also automatically converts Pydantic model return values to dicts using .model_dump(). - - If the function is already async, wrap it to handle Pydantic models. - If it's sync, wrap it to be awaitable using asyncio.run_in_executor and handle Pydantic models. - - Args: - func: The tool function (sync or async) - - Returns: - An awaitable function (coroutine function) that returns dicts for Pydantic models - """ - from pydantic import BaseModel - - async def wrapper_with_pydantic(*args, **kwargs): - """Inner wrapper that handles Pydantic model conversion.""" - result = await func(*args, **kwargs) if inspect.iscoroutinefunction(func) else func(*args, **kwargs) - - # Convert Pydantic models to dicts - if isinstance(result, BaseModel): - return result.model_dump() - - return result - - if inspect.iscoroutinefunction(func): - # Function is already async, just add Pydantic handling - return wrapper_with_pydantic - - # Function is sync, make it awaitable and add Pydantic handling - async def async_wrapper(*args, **kwargs): - # For sync functions, run in executor to make them awaitable - loop = asyncio.get_event_loop() - result = await loop.run_in_executor(None, lambda: func(*args, **kwargs)) - - # Convert Pydantic models to dicts - from pydantic import BaseModel - - if isinstance(result, BaseModel): - return result.model_dump() - - return result - - return async_wrapper - - class CugaLiteState(BaseModel): """State for CugaLite subgraph. @@ -716,23 +679,20 @@ def _reflection_current_task(state: CugaLiteState) -> str: return "" -def extract_and_combine_codeblocks(text: str) -> str: - """Extract all python codeblocks from text and combine them.""" - code_blocks = re.findall(BACKTICK_PATTERN, text, re.DOTALL) +class _CugaLiteLoopAdapter(CoreGraphAdapter): + """Lite seam: messages live on ``chat_messages``; step limit from + ``configurable`` override else ``settings.advanced_features``.""" - if code_blocks: - return "\n\n".join(block.strip() for block in code_blocks) + messages_key = "chat_messages" - stripped_text = text.strip() + def get_messages(self, state: CugaLiteState) -> List[BaseMessage]: + return state.chat_messages - if "print(" not in stripped_text: - return "" + def resolve_max_steps(self, state: CugaLiteState, override: Optional[int]) -> int: + return override if override is not None else settings.advanced_features.cuga_lite_max_steps - try: - compile(stripped_text.replace('await ', ''), '', 'exec') - return stripped_text - except SyntaxError: - return "" + +_LITE_LOOP_ADAPTER = _CugaLiteLoopAdapter() def append_chat_messages_with_step_limit( @@ -740,33 +700,8 @@ def append_chat_messages_with_step_limit( new_messages: List[BaseMessage], max_steps: Optional[int] = None, ) -> Tuple[List[BaseMessage], Optional[AIMessage]]: - """Append new messages to chat_messages with step counting and limit checking. - - Args: - state: Current CugaLiteState - new_messages: List of new messages to append - max_steps: Override from configurable; when None, use settings - - Returns: - Tuple of (updated_chat_messages, error_message) - - updated_chat_messages: Updated list of chat messages - - error_message: AIMessage with error if limit reached, None otherwise - """ - limit = max_steps if max_steps is not None else settings.advanced_features.cuga_lite_max_steps - new_step_count = state.step_count + 1 - - if new_step_count > limit: - error_msg = ( - f"Maximum step limit ({limit}) reached. " - f"The task has exceeded the allowed number of execution cycles. " - f"Please simplify your request or break it into smaller tasks." - ) - logger.warning(f"Step limit reached: {new_step_count} > {limit}") - error_ai_message = AIMessage(content=error_msg) - return state.chat_messages + new_messages + [error_ai_message], error_ai_message - - logger.debug(f"Step count: {new_step_count}/{limit}") - return state.chat_messages + new_messages, None + """Append messages to ``chat_messages`` with step counting + limit check.""" + return _core_append_with_step_limit(_LITE_LOOP_ADAPTER, state, new_messages, max_steps) def create_error_command( @@ -775,29 +710,10 @@ def create_error_command( step_count: int, additional_updates: Optional[Dict[str, Any]] = None, ) -> Command: - """Create a Command to END with error information. - - Args: - updated_messages: Updated chat messages - error_message: Error message to return - step_count: Current step count - additional_updates: Optional additional state updates - - Returns: - Command routing to END with error state - """ - updates = { - "chat_messages": updated_messages, - "script": None, - "final_answer": error_message.content, - "execution_complete": True, - "error": error_message.content, - "step_count": step_count + 1, - } - if additional_updates: - updates.update(additional_updates) - - return Command(goto=END, update=updates) + """Create a Command to END with error information.""" + return _core_create_error_command( + _LITE_LOOP_ADAPTER, updated_messages, error_message, step_count, additional_updates + ) class Todo(BaseModel): @@ -1170,7 +1086,9 @@ async def prepare_tools_and_apps( ) # Skip policy checking if policies are disabled or if we're returning from approval - if settings.policy.enabled and not ToolApprovalHandler.should_skip_policy_check(state): + if settings.policy.enabled and not ToolApprovalHandler.should_skip_policy_check( + _LITE_LOOP_ADAPTER, state + ): # Check for policies and enact if matched # Include IntentGuard, Playbook, and ToolGuide for intent checks from cuga.backend.cuga_graph.policy.models import PolicyType @@ -1179,6 +1097,7 @@ async def prepare_tools_and_apps( state, config, policy_types=[PolicyType.INTENT_GUARD, PolicyType.PLAYBOOK, PolicyType.TOOL_GUIDE], + adapter=_LITE_LOOP_ADAPTER, ) # If policy returned a command (e.g., BLOCK_INTENT), execute it immediately @@ -1187,7 +1106,7 @@ async def prepare_tools_and_apps( # If policy returned metadata (e.g., playbook guidance), store it if metadata: - state.cuga_lite_metadata = metadata + _LITE_LOOP_ADAPTER.set_metadata(state, metadata) elif not settings.policy.enabled: logger.debug("Policy system disabled - skipping policy checks") else: @@ -1423,90 +1342,31 @@ async def prepare_tools_and_apps( else: logger.warning(f"Skill tool '{tool.name}' has no callable, skipping") - # Inject the consolidated filesystem tools + run_command when shell - # tools are enabled. Filesystem tools come from the single runtime - # class (no MCP); the storage backend is selected by sandbox_mode: - # Filesystem tools are gated independently from run_command. - # enable_filesystem_tools → the 8 consolidated read/write/list/… tools. - # enable_shell_tool → run_command (sandbox shell execution). - _sandbox_mode = getattr(settings.advanced_features, "sandbox_mode", "opensandbox") - _shell_tool_on = getattr(settings.advanced_features, "enable_shell_tool", False) - _fs_tool_on = ( - configurable["enable_filesystem_tools"] - if "enable_filesystem_tools" in configurable - else getattr(settings.advanced_features, "enable_filesystem_tools", False) - ) - _opensandbox_on = getattr(settings.advanced_features, "opensandbox_sandbox", False) - _use_sandbox = _shell_tool_on and ( - (_sandbox_mode == "native") - or (_sandbox_mode == "opensandbox" and _opensandbox_on) - or (_sandbox_mode == "local") - ) + # Inject the consolidated filesystem tools + run_command via the + # shared runtime_tools orchestrator. Backend selection and gating + # live in cuga_agent_core (behavior-identical to the previous + # inline block); filesystem and run_command remain independently + # gated by enable_filesystem_tools / enable_shell_tool. + _runtime_backends = resolve_runtime_backends(settings, configurable) - if _fs_tool_on or _use_sandbox: + if _runtime_backends.filesystem != "none" or _runtime_backends.shell != "none": cfg = config.get("configurable", {}) if config else {} runtime_thread_id = cfg["thread_id"] if "thread_id" in cfg else (state.thread_id or thread_id) else: runtime_thread_id = None - # ── Filesystem tools (independent of run_command) ────────────────── - if _fs_tool_on: - from cuga.backend.cuga_graph.nodes.cuga_lite.executors.filesystem import ( - RemoteSandboxBackend, - create_filesystem_tools, - ) - - fs_backend = None - if _use_sandbox and _sandbox_mode == "opensandbox": - from cuga.backend.cuga_graph.nodes.cuga_lite.executors import CodeExecutor - - fs_backend = RemoteSandboxBackend( - CodeExecutor._get_opensandbox_executor(), runtime_thread_id - ) - - fs_tools = create_filesystem_tools(runtime_thread_id, backend=fs_backend) - for ft in fs_tools: - fn = ft.coroutine or ft.func - if fn: - tools_context_dict[ft.name] = fn - tools_for_prompt.extend(fs_tools) - if apps_for_prompt is not None: - apps_for_prompt = list(apps_for_prompt) + [ - AppDefinition( - name="filesystem", - type="runtime", - description="Workspace filesystem tools: read, write, edit, list, search, move files and directories.", - ) - ] - logger.info( - f"Injected filesystem tools (thread_id={runtime_thread_id!r}): {[t.name for t in fs_tools]}" - ) - - # ── run_command (sandbox shell execution) ────────────────────────── - if _use_sandbox: - from cuga.backend.cuga_graph.nodes.cuga_lite.executors import CodeExecutor - - if _sandbox_mode == "native": - sandbox_executor = CodeExecutor._get_native_executor() - sandbox_label = "NativeSandbox" - elif _sandbox_mode == "local": - sandbox_executor = CodeExecutor._get_local_sandbox_executor() - sandbox_label = "LocalSandbox" - else: - sandbox_executor = CodeExecutor._get_opensandbox_executor() - sandbox_label = "OpenSandbox" - - run_cmd_tools = sandbox_executor.create_sandbox_tools(thread_id=runtime_thread_id) - for st in run_cmd_tools: - fn = st.coroutine or st.func - if fn: - tools_context_dict[st.name] = fn - tools_for_prompt.extend(run_cmd_tools) - logger.info(f"[{sandbox_label}] Injected run_command (thread_id={runtime_thread_id!r})") + _runtime_bundle = build_runtime_tools(thread_id=runtime_thread_id, backends=_runtime_backends) + tools_context_dict.update(_runtime_bundle.execution_callables) + tools_for_prompt.extend(_runtime_bundle.prompt_tools) + if _runtime_bundle.app_definitions and apps_for_prompt is not None: + apps_for_prompt = list(apps_for_prompt) + _runtime_bundle.app_definitions from cuga.backend.evolve.memory import build_evolve_special_instructions_extension special_instructions_final = effective_special or "" + _split_note = split_execution_note(ExecutionRouter.resolve(settings)) + if _split_note: + special_instructions_final = (special_instructions_final + "\n\n" + _split_note).strip() evolve_extension = await build_evolve_special_instructions_extension( state=state, configurable=configurable, @@ -1773,407 +1633,6 @@ async def _wrapped(*args, **kwargs): return prepare_tools_and_apps - # Factory function to create call_model node with access to model - def create_call_model_node( - base_model, - base_callbacks, - task_todos_ref: List[Dict[str, str]], - model_settings=None, - tools_context_ref=None, - base_tool_provider=None, - ): - """Factory to create call_model node. Model is taken from config['configurable']['llm'] - when set (injected at invocation), otherwise uses base_model from graph build. - """ - - async def call_model(state: CugaLiteState, config: Optional[RunnableConfig] = None) -> Command: - """Call the LLM to generate code or text response.""" - configurable = config.get("configurable", {}) if config else {} - max_steps = ( - configurable.get("cuga_lite_max_steps") if "cuga_lite_max_steps" in configurable else None - ) - - logger.debug( - f"[APPROVAL DEBUG] call_model received cuga_lite_metadata: {state.cuga_lite_metadata}" - ) - - # Check if we're returning from tool approval - if so, skip code generation and go to sandbox - # Only check if policies are enabled - if settings.policy.enabled and ToolApprovalHandler.is_returning_from_approval(state): - return ToolApprovalHandler.handle_approval_resumption(state) - - # Get prompt from state (tools are available via sandbox context, not needed here) - dynamic_prompt = state.prepared_prompt or "" - - _cfg = config.get("configurable", {}) if config else {} - _enable_todos = ( - _cfg.get("enable_todos") - if "enable_todos" in _cfg - else settings.advanced_features.enable_todos - ) - - system_content = dynamic_prompt - if _enable_todos: - if task_todos_ref: - system_content = dynamic_prompt + format_task_todos_system_block(task_todos_ref) - elif state.task_todos: - system_content = dynamic_prompt + format_current_plan_section(state.task_todos) - - # Convert BaseMessage objects to dict format for model invocation - messages_for_model = [{"role": "system", "content": system_content}] - few_shot_messages = state.mcp_few_shot_messages or [] - for example in few_shot_messages: - role = (example.get("role") or "").strip().lower() - content = example.get("content") or "" - if role in {"user", "assistant"} and content: - messages_for_model.append({"role": role, "content": content}) - if few_shot_messages: - logger.info( - "Injected {} MCP few-shot turn(s) as chat messages before live conversation", - len(few_shot_messages), - ) - - # Check if we have variables and this is a new question (not a follow-up with existing AI responses) - # If this is a new question (1 user msg, 0 AI msgs) or follow-up, add variables to the last user message - var_manager = state.variables_manager - for _vn in list(var_manager.get_variable_names()): - if is_find_tools_listing_markdown(var_manager.get_variable(_vn)): - var_manager.remove_variable(_vn) - existing_variable_names = var_manager.get_variable_names() - variables_summary_text = None - - if existing_variable_names and state.sub_task_app: - variables_summary_text = var_manager.get_variables_summary( - variable_names=existing_variable_names - ) - variables_addendum = f"\n\n## Available Variables\n\n{variables_summary_text}\n\nYou can use these variables directly by their names." - logger.info( - f"Will add variables summary for {len(existing_variable_names)} variables to user message" - ) - - logger.info(f"Processing {len(state.chat_messages)} chat messages for model invocation") - - # Track if we've added personal information (pi) - pi_added = False - - # Get playbook guidance if available (only on first detection) - # TODO: In the future, we could refine the playbook guidance on each message - # based on conversation progress and completed steps - playbook_guidance = None - playbook_already_added = False - - # Check if playbook guidance was already added in previous messages - if state.cuga_lite_metadata and state.cuga_lite_metadata.get('playbook_guidance_added'): - playbook_already_added = True - - if ( - state.cuga_lite_metadata - and state.cuga_lite_metadata.get('policy_matched') - and not playbook_already_added - ): - if state.cuga_lite_metadata.get('policy_type') == 'playbook': - playbook_guidance = state.cuga_lite_metadata.get('playbook_guidance') - if playbook_guidance: - logger.info( - "Will inject playbook guidance into current user message (first time only)" - ) - - # Get configurable values from config - configurable = config.get("configurable", {}) if config else {} - current_callbacks = configurable.get("callbacks", base_callbacks or []) - active_model = configurable.get("llm") or base_model - _runtime_model_name = resolved_runtime_model_name( - configurable_llm=configurable.get("llm"), - graph_default_model=base_model, - ) - - # ── Context management BEFORE building messages_for_model ──────────── - effective_chat_messages = await apply_context_summarization( - state.chat_messages or [], - active_model, - system_prompt=dynamic_prompt, - tools=None, - tracker=tracker, - variables_storage=state.variables_storage, - variable_counter_state=state.variable_counter_state, - variable_creation_order=state.variable_creation_order, - ) - # effective_chat_messages may contain summarized messages if context limit exceeded - # ───────────────────────────────────────────────────────────────────── - - # Build messages_for_model from effective_chat_messages (post-summarization) - # Also build modified_chat_messages with playbook/pi/variables injected - modified_chat_messages = [] - for i, msg in enumerate(effective_chat_messages): - msg_type = type(msg).__name__ - msg_role = getattr(msg, 'type', None) - - if isinstance(msg, HumanMessage): - content = msg.content - content_modified = False - - # Add personal information (pi) to the FIRST user message only - if ( - state.pi - and not pi_added - and "## User Context" not in content - and len(effective_chat_messages) == 1 - ): - content = f"{content}\n\n## User Context\n{state.pi}" - pi_added = True - content_modified = True - logger.debug("Added personal information (pi) to first user message") - - # Add playbook guidance to the LAST user message only - if playbook_guidance and i == len(effective_chat_messages) - 1: - content = f"{content}\n\n## Task Guidance\n{playbook_guidance}" - content_modified = True - logger.debug("Added playbook guidance to last user message") - - # Add variables summary to the LAST user message only - if variables_summary_text and i == len(effective_chat_messages) - 1: - content = content + variables_addendum - content_modified = True - logger.debug("Added variables summary to last user message") - - # Build new message if modified, otherwise keep original - if content_modified: - modified_chat_messages.append(HumanMessage(content=content)) - logger.debug(f"Created modified message at index {i} with playbook/pi/variables") - else: - modified_chat_messages.append(msg) - - messages_for_model.append({"role": "user", "content": content}) - elif isinstance(msg, AIMessage): - modified_chat_messages.append(msg) - messages_for_model.append({"role": "assistant", "content": msg.content}) - else: - # Handle generic BaseMessage by checking the 'type' attribute - if msg_role == 'human' or msg_role == 'user': - content = msg.content - content_modified = False - - # Add personal information (pi) to the FIRST user message only - if state.pi and not pi_added: - content = f"{content}\n\n## User Context\n{state.pi}" - pi_added = True - content_modified = True - logger.debug("Added personal information (pi) to first user message") - - # Add playbook guidance to the LAST user message only - if playbook_guidance and i == len(effective_chat_messages) - 1: - content = f"{content}\n\n## Task Guidance\n{playbook_guidance}" - content_modified = True - logger.debug("Added playbook guidance to last user message") - - if variables_summary_text and i == len(effective_chat_messages) - 1: - content = content + variables_addendum - content_modified = True - - # Build new message if modified, otherwise keep original - if content_modified: - modified_chat_messages.append(HumanMessage(content=content)) - logger.debug(f"Created modified message at index {i} with playbook/pi/variables") - else: - modified_chat_messages.append(msg) - - messages_for_model.append({"role": "user", "content": content}) - logger.debug(f"Added BaseMessage as user message (role={msg_role})") - elif msg_role == 'ai' or msg_role == 'assistant': - modified_chat_messages.append(msg) - messages_for_model.append({"role": "assistant", "content": msg.content}) - logger.debug(f"Added BaseMessage as assistant message (role={msg_role})") - else: - modified_chat_messages.append(msg) - logger.warning( - f"Skipping message {i} with unknown type: {msg_type}, role: {msg_role}" - ) - - try: - invoke_model = await resolve_model_with_bind_tools( - active_model, - configurable=configurable, - tools_context_ref=tools_context_ref, - tool_provider=base_tool_provider, - model_name=_runtime_model_name, - ) - - response = await invoke_model.ainvoke( - messages_for_model, config={"callbacks": current_callbacks} - ) - logger.debug(f"Response: {response}") - except Exception as e: - code = extract_code_from_tool_use_failed(e) - if code: - logger.warning( - "Model attempted tool call without tools bound (tool_use_failed). " - "Using generated code in sandbox" - ) - response = type( - "_FakeResponse", (), {"content": f"```python\n{code}\n```", "additional_kwargs": {}} - )() - else: - raise e - - _resp_tool_calls = getattr(response, "tool_calls", None) or [] - _resp_ak_keys = list((getattr(response, "additional_kwargs", None) or {}).keys()) - _resp_finish = (getattr(response, "response_metadata", None) or {}).get( - "finish_reason", "unknown" - ) - logger.debug( - f"LLM response — type: {type(response).__name__} | " - f"content_len: {len(response.content or '')} | " - f"finish_reason: {_resp_finish} | " - f"tool_calls: {_resp_tool_calls} | " - f"additional_kwargs_keys: {_resp_ak_keys}" - ) - - raw_content = normalize_assistant_text(response.content) - if not raw_content: - tool_code = _extract_code_from_response_tool_calls(response) - if tool_code: - logger.warning( - "Empty content with tool_calls detected (proxy conversion); " - "recovering tool call as Python code" - ) - raw_content = tool_code - elif _resp_finish not in ("stop", "unknown"): - logger.warning( - f"LLM returned empty content with finish_reason='{_resp_finish}'; " - "likely a safety filter or terminal stop." - ) - - content = raw_content - - reasoning_str = normalize_assistant_text(response.additional_kwargs.get('reasoning_content')) - - tracker.collect_step(step=Step(name="Raw_Assistant_Response", data=content)) - - # Try to extract code from content first, then reasoning if content has no code - code = extract_and_combine_codeblocks(content) if content else "" - - if not code and reasoning_str: - code = extract_and_combine_codeblocks(reasoning_str) - - if code: - tracker.collect_step(step=Step(name="Assistant_code", data=content)) - logger.debug( - f"\n{'=' * 50} ASSISTANT CODE {'=' * 50}\n{code}\n{'=' * 50} END ASSISTANT CODE {'=' * 50}" - ) - - # Check if code requires approval and create interrupt if needed - # Only check if policies are enabled - if settings.policy.enabled: - approval_command = await ToolApprovalHandler.check_and_create_approval_interrupt( - state, code, content, config - ) - if approval_command: - return approval_command - - # Build updated messages from modified_chat_messages + new AI response - updated_messages = modified_chat_messages + [AIMessage(content=content)] - new_step_count = state.step_count + 1 - - # Check step limit - limit = max_steps if max_steps is not None else settings.advanced_features.cuga_lite_max_steps - if new_step_count > limit: - error_msg = ( - f"Maximum step limit ({limit}) reached. " - f"The task has exceeded the allowed number of execution cycles. " - f"Please simplify your request or break it into smaller tasks." - ) - logger.warning(f"Step limit reached: {new_step_count} > {limit}") - error_ai_message = AIMessage(content=error_msg) - return create_error_command( - updated_messages + [error_ai_message], error_ai_message, state.step_count - ) - - logger.debug(f"Step count: {new_step_count}/{limit}") - - # Update metadata to mark playbook guidance as added - updated_metadata = _clean_empty_response_retry_meta(state.cuga_lite_metadata or {}) - if playbook_guidance: - updated_metadata = {**updated_metadata, "playbook_guidance_added": True} - - return Command( - goto="sandbox", - update={ - "chat_messages": updated_messages, - "script": code, - "step_count": new_step_count, - "cuga_lite_metadata": updated_metadata, - }, - ) - else: - tracker.collect_step(step=Step(name="Assistant_nl", data=content)) - planning_response = content or "" - - # Build updated messages from modified_chat_messages + new AI response - updated_messages = modified_chat_messages + [AIMessage(content=planning_response)] - new_step_count = state.step_count + 1 - - # Check step limit - limit = max_steps if max_steps is not None else settings.advanced_features.cuga_lite_max_steps - if new_step_count > limit: - error_msg = ( - f"Maximum step limit ({limit}) reached. " - f"The task has exceeded the allowed number of execution cycles. " - f"Please simplify your request or break it into smaller tasks." - ) - logger.warning(f"Step limit reached: {new_step_count} > {limit}") - error_ai_message = AIMessage(content=error_msg) - return create_error_command( - updated_messages + [error_ai_message], error_ai_message, state.step_count - ) - - logger.debug(f"Step count: {new_step_count}/{limit}") - - # Update metadata to mark playbook guidance as added - updated_metadata = _clean_empty_response_retry_meta(state.cuga_lite_metadata or {}) - if playbook_guidance: - updated_metadata = {**updated_metadata, "playbook_guidance_added": True} - - should_auto_continue = await classify_nl_auto_continue( - active_model, - planning_response, - reasoning_str or None, - ) - tracker.collect_step( - step=Step( - name="NL_Auto_Continue_Classifier", - data=json.dumps({"auto_continue": should_auto_continue}), - ) - ) - if should_auto_continue: - logger.info( - "CugaLite: NL-only response classified as interim; simulating user 'continue'" - ) - return Command( - goto="call_model", - update={ - "chat_messages": updated_messages + [HumanMessage(content="continue")], - "script": None, - "final_answer": "", - "execution_complete": False, - "step_count": new_step_count, - "cuga_lite_metadata": updated_metadata, - }, - ) - - return Command( - goto=END, - update={ - "chat_messages": updated_messages, - "script": None, - "final_answer": planning_response, - "execution_complete": True, - "step_count": new_step_count, - "cuga_lite_metadata": updated_metadata, - }, - ) - - return call_model - # Factory function to create sandbox node with access to tools context def create_sandbox_node(base_tools_context, base_thread_id, base_apps_list): """Factory to create sandbox node with closure over tools context and config.""" @@ -2184,7 +1643,7 @@ async def sandbox(state: CugaLiteState, config: Optional[RunnableConfig] = None) # Check if user denied approval (only if policies are enabled) if settings.policy.enabled: - denial_command = ToolApprovalHandler.handle_denial(state) + denial_command = ToolApprovalHandler.handle_denial(_LITE_LOOP_ADAPTER, state) if denial_command: return denial_command @@ -2221,12 +1680,21 @@ async def sandbox(state: CugaLiteState, config: Optional[RunnableConfig] = None) try: # Execute the script - pass the CugaLiteState itself since it has variables_manager + _exec_plan = ExecutionRouter.resolve(settings) + if _exec_plan.split_execution_active: + logger.info( + "Split execution: python=%s shell=%s fs=%s", + _exec_plan.python_backend, + _exec_plan.shell_backend, + _exec_plan.filesystem_backend, + ) output, new_vars = await CodeExecutor.eval_with_tools_async( code=state.script, _locals=context, state=state, # Pass CugaLiteState - it has variables_manager property thread_id=current_thread_id, apps_list=current_apps_list, + plan=_exec_plan, ) tracker.collect_step(step=Step(name="User_output", data=output)) @@ -2295,7 +1763,7 @@ async def sandbox(state: CugaLiteState, config: Optional[RunnableConfig] = None) reflection_output = "" # Output is already formatted by code_executor - execution_message_content = f"Execution output:\n{output}" + execution_message_content = execution_output_text(output) if reflection_output: execution_message_content = ( f"{execution_message_content}\n\n---\n\nSummary:\n{reflection_output}" @@ -2386,23 +1854,22 @@ async def sandbox(state: CugaLiteState, config: Optional[RunnableConfig] = None) task_todos_ref, lc_bind_tools_meta=lc_bind_tools_meta, ) - call_model_node = create_call_model_node( - model, - callbacks, - task_todos_ref, - model_settings=model_settings, + sandbox_node = create_sandbox_node(tools_context, thread_id, apps_list) + + # Shared call_model node via AgentGraphAdapter + adapter = AgentGraphAdapter( + tracker=tracker, + base_callbacks=callbacks or [], + task_todos_ref=task_todos_ref, tools_context_ref=lc_bind_tools_meta, base_tool_provider=tool_provider, ) - sandbox_node = create_sandbox_node(tools_context, thread_id, apps_list) - - # Build the graph - graph = StateGraph(CugaLiteState) - graph.add_node("prepare_tools_and_apps", prepare_node) - graph.add_node("call_model", call_model_node) - graph.add_node("sandbox", sandbox_node) - - graph.add_edge(START, "prepare_tools_and_apps") - graph.add_edge("sandbox", "call_model") - - return graph + call_model_node = _create_shared_call_model_node(adapter, model, settings) + + return build_agent_graph( + adapter=adapter, + state_class=CugaLiteState, + prepare_node=prepare_node, + call_model_node=call_model_node, + execute_node=sandbox_node, + ) diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/code_executor.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/code_executor.py index fa369755..8bb68500 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/code_executor.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/code_executor.py @@ -13,6 +13,7 @@ from .opensandbox import OpenSandboxExecutor from .native import NativeSandboxExecutor from .base_executor import BaseExecutor, RemoteExecutor +from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution_policy import ExecutionPlan def _skills_enabled() -> bool: @@ -107,6 +108,8 @@ async def eval_with_tools_async( thread_id: Optional[str] = None, apps_list: Optional[List[str]] = None, mode: Optional[Literal['local', 'e2b', 'opensandbox']] = None, + plan: Optional[ExecutionPlan] = None, + variable_manager: Optional[Any] = None, ) -> tuple[str, dict[str, Any]]: """Execute code with async tools available in the local namespace. @@ -116,7 +119,12 @@ async def eval_with_tools_async( state: AgentState instance with variables_manager thread_id: Thread ID for sandbox caching (optional) apps_list: List of app names for parsing tool names correctly (optional) - mode: Execution mode ('local', 'e2b', or 'opensandbox'). If None, uses settings. + mode: Execution mode ('local', 'e2b', or 'opensandbox'). If None, uses + ``plan.python_backend`` when a plan is given, else settings. + plan: Resolved ExecutionPlan; its ``python_backend`` selects the + Python execution path unless ``mode`` is given explicitly. + variable_manager: Variable manager to record new variables into. + Defaults to ``state.variables_manager`` (preserves prior behavior). Returns: Tuple of (execution result, new variables dictionary) @@ -125,7 +133,10 @@ async def eval_with_tools_async( result = "" if mode is None: - mode = 'e2b' if settings.advanced_features.e2b_sandbox else 'local' + if plan is not None: + mode = 'e2b' if plan.python_backend == 'e2b' else 'local' + else: + mode = 'e2b' if settings.advanced_features.e2b_sandbox else 'local' # Force local execution for short find_tools or load_skill calls code_lines = [line.strip() for line in code.split('\n') if line.strip()] @@ -216,7 +227,7 @@ async def eval_with_tools_async( # Add variables summary to the formatted output result = VariableUtils.add_variables_to_manager( new_vars, - state.variables_manager, + variable_manager if variable_manager is not None else state.variables_manager, result, skip_summary_keys={'todos'}, ) diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_execution_plan_wiring.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_execution_plan_wiring.py new file mode 100644 index 00000000..ffe62594 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_execution_plan_wiring.py @@ -0,0 +1,132 @@ +"""Phase 2 wiring: CodeExecutor accepts an explicit ExecutionPlan and an +explicit variable_manager, without changing behavior for existing callers. + +- explicit ``variable_manager`` is used instead of ``state.variables_manager`` + (foundation for the supervisor variable-coupling fix, phase 9); +- ``plan.python_backend`` drives the e2b-vs-local choice when ``mode`` is + not explicitly given; +- an explicit ``mode`` argument still wins over the plan. +""" + +from __future__ import annotations + +from unittest.mock import Mock + +import pytest + +from cuga.backend.cuga_graph.state.agent_state import AgentState, VariablesManager +from cuga.backend.cuga_graph.nodes.cuga_lite.executors import CodeExecutor +from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution_policy import ExecutionPlan + + +def _plan(python_backend: str) -> ExecutionPlan: + return ExecutionPlan( + requested_backend=python_backend, + python_backend=python_backend, + shell_backend="none", + filesystem_backend="none", + ) + + +@pytest.fixture +def mock_state(): + state = Mock(spec=AgentState) + state.variables_manager = VariablesManager() + return state + + +@pytest.mark.asyncio +async def test_explicit_variable_manager_is_used_instead_of_state(mock_state): + explicit_vm = VariablesManager() + + await CodeExecutor.eval_with_tools_async( + code="result = 41 + 1\nprint(result)", + _locals={}, + state=mock_state, + mode="local", + variable_manager=explicit_vm, + ) + + assert "result" in explicit_vm.get_variable_names() + assert "result" not in mock_state.variables_manager.get_variable_names() + + +@pytest.mark.asyncio +async def test_omitting_variable_manager_falls_back_to_state(mock_state): + await CodeExecutor.eval_with_tools_async( + code="result = 7\nprint(result)", + _locals={}, + state=mock_state, + mode="local", + ) + + assert "result" in mock_state.variables_manager.get_variable_names() + + +@pytest.mark.asyncio +async def test_plan_python_backend_e2b_routes_to_e2b_executor(mock_state, monkeypatch): + called = {} + + class FakeE2B: + async def execute_for_cuga_lite(self, *, wrapped_code, context_locals, state, thread_id, apps_list): + called["e2b"] = True + return "ok", {} + + monkeypatch.setattr(CodeExecutor, "_get_e2b_executor", classmethod(lambda cls: FakeE2B())) + + await CodeExecutor.eval_with_tools_async( + code="print('hi')", + _locals={}, + state=mock_state, + plan=_plan("e2b"), + ) + + assert called.get("e2b") is True + + +@pytest.mark.asyncio +async def test_supervisor_adapter_variable_manager_routes_to_supervisor_vm(mock_state): + """Phase-9 regression guard: passing supervisor_variables_manager as variable_manager + routes new variables to that manager, not to state.variables_manager. + + The structural fix (passing the adapter's get_variable_manager result to + eval_with_tools_async inside execute_agent_tool) is validated here via the + mechanism that the wiring enables — the closure itself cannot be isolated + without full graph construction. + """ + sup_vm = VariablesManager() + mock_state.supervisor_variables_manager = sup_vm + + await CodeExecutor.eval_with_tools_async( + code="phase9_var = 99\nprint(phase9_var)", + _locals={}, + state=mock_state, + mode="local", + variable_manager=sup_vm, + ) + + assert "phase9_var" in sup_vm.get_variable_names() + assert "phase9_var" not in mock_state.variables_manager.get_variable_names() + + +@pytest.mark.asyncio +async def test_explicit_mode_overrides_plan(mock_state, monkeypatch): + called = {} + + class FakeE2B: + async def execute_for_cuga_lite(self, **kwargs): + called["e2b"] = True + return "ok", {} + + monkeypatch.setattr(CodeExecutor, "_get_e2b_executor", classmethod(lambda cls: FakeE2B())) + + result, new_vars = await CodeExecutor.eval_with_tools_async( + code="result = 5\nprint(result)", + _locals={}, + state=mock_state, + mode="local", + plan=_plan("e2b"), + ) + + assert called.get("e2b") is None + assert new_vars.get("result") == 5 diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_extract_codeblocks.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_extract_codeblocks.py index f8d43c37..db89e287 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_extract_codeblocks.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_extract_codeblocks.py @@ -1,4 +1,4 @@ -from cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_graph import extract_and_combine_codeblocks +from cuga.backend.cuga_graph.nodes.cuga_agent_core.code_extraction import extract_and_combine_codeblocks class TestExtractAndCombineCodeblocks: diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_sync_async_tools.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_sync_async_tools.py index f974ff94..d85b8248 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_sync_async_tools.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_sync_async_tools.py @@ -11,7 +11,7 @@ from cuga.backend.cuga_graph.state.agent_state import AgentState, VariablesManager from cuga.backend.cuga_graph.nodes.cuga_lite.executors.code_executor import CodeExecutor -from cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_graph import make_tool_awaitable +from cuga.backend.cuga_graph.nodes.cuga_agent_core.code_extraction import make_tool_awaitable @pytest.fixture diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/memory/test_cuga_lite_graph_evolve_integration.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/memory/test_cuga_lite_graph_evolve_integration.py index 47996d9d..f06047a5 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/memory/test_cuga_lite_graph_evolve_integration.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/memory/test_cuga_lite_graph_evolve_integration.py @@ -29,11 +29,11 @@ def _make_stub_tool_provider(): def _make_prepare_node(): - """Return the prepare_tools_and_apps runnable extracted from a minimal graph.""" + """Return the prepare node runnable extracted from a minimal graph.""" model = MagicMock() model.bind_tools = MagicMock(return_value=model) graph = create_cuga_lite_graph(model=model, tool_provider=_make_stub_tool_provider(), apps_list=[]) - return graph.nodes["prepare_tools_and_apps"].runnable + return graph.nodes["prepare"].runnable class TestEvolveStoreAndRetrieveCalledDuringPrepare: diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_agent_graph_adapter.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_agent_graph_adapter.py new file mode 100644 index 00000000..ac4eecb2 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_agent_graph_adapter.py @@ -0,0 +1,294 @@ +"""Phase 4 — AgentGraphAdapter hook correctness tests. + +Pins the hook overrides that AgentGraphAdapter contributes to the shared +call_model node: + +1. Class-level attributes (messages_key, execute_node_name, etc.) +2. State-reading hooks: get_messages, get_few_shot_messages, get_pi, + get_variables_storage, get_variable_manager +3. Lifecycle hooks: get_tracker, get_invoke_config, normalize_response, + on_response_processed, build_metadata_update, classify_auto_continue +4. System content augmentation: prepare_system_content with/without todos + +These tests do NOT test the full prepare_tools_and_apps / sandbox logic — +those are covered by the existing Lite integration tests. +""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from langchain_core.messages import HumanMessage + + +def _get_adapter_class(): + from cuga.backend.cuga_graph.nodes.cuga_lite.agent_graph_adapter import AgentGraphAdapter + + return AgentGraphAdapter + + +def _make_tracker(): + tracker = MagicMock() + tracker.collect_step = MagicMock() + return tracker + + +def _make_adapter(*, task_todos_ref=None, tools_context_ref=None, base_tool_provider=None): + AgentGraphAdapter = _get_adapter_class() + return AgentGraphAdapter( + tracker=_make_tracker(), + base_callbacks=[], + task_todos_ref=task_todos_ref or [], + tools_context_ref=tools_context_ref or {}, + base_tool_provider=base_tool_provider, + ) + + +# ── 1. Class-level attributes ────────────────────────────────────────────── + + +def test_messages_key_is_chat_messages(): + AgentGraphAdapter = _get_adapter_class() + assert AgentGraphAdapter.messages_key == "chat_messages" + + +def test_execute_node_name_is_sandbox(): + AgentGraphAdapter = _get_adapter_class() + assert AgentGraphAdapter.execute_node_name == "sandbox" + + +def test_metadata_key_is_cuga_lite_metadata(): + AgentGraphAdapter = _get_adapter_class() + assert AgentGraphAdapter.metadata_key == "cuga_lite_metadata" + + +def test_sender_name_is_cuga_lite(): + AgentGraphAdapter = _get_adapter_class() + assert AgentGraphAdapter.sender_name == "CugaLite" + + +# ── 2. State-reading hooks ───────────────────────────────────────────────── + + +def test_get_messages_returns_chat_messages(): + adapter = _make_adapter() + msg = HumanMessage(content="hi") + state = SimpleNamespace(chat_messages=[msg]) + assert adapter.get_messages(state) == [msg] + + +def test_get_messages_returns_empty_list_when_none(): + adapter = _make_adapter() + state = SimpleNamespace(chat_messages=None) + assert adapter.get_messages(state) == [] + + +def test_get_few_shot_messages_returns_mcp_few_shot(): + adapter = _make_adapter() + examples = [{"role": "user", "content": "example"}] + state = SimpleNamespace(mcp_few_shot_messages=examples) + assert adapter.get_few_shot_messages(state) == examples + + +def test_get_few_shot_messages_returns_empty_when_none(): + adapter = _make_adapter() + state = SimpleNamespace(mcp_few_shot_messages=None) + assert adapter.get_few_shot_messages(state) == [] + + +def test_get_pi_returns_state_pi(): + adapter = _make_adapter() + state = SimpleNamespace(pi="You are helpful.") + assert adapter.get_pi(state) == "You are helpful." + + +def test_get_pi_returns_none_when_missing(): + adapter = _make_adapter() + state = SimpleNamespace(pi=None) + assert adapter.get_pi(state) is None + + +def test_get_variables_storage_returns_state_variables_storage(): + adapter = _make_adapter() + storage = {"x": {"value": 42}} + state = SimpleNamespace(variables_storage=storage) + assert adapter.get_variables_storage(state) is storage + + +# ── 3. Tracker hook ─────────────────────────────────────────────────────── + + +def test_get_tracker_returns_injected_tracker(): + AgentGraphAdapter = _get_adapter_class() + tracker = _make_tracker() + adapter = AgentGraphAdapter( + tracker=tracker, + base_callbacks=[], + task_todos_ref=[], + tools_context_ref={}, + base_tool_provider=None, + ) + assert adapter.get_tracker() is tracker + + +# ── 4. invoke_config hook ───────────────────────────────────────────────── + + +def test_get_invoke_config_returns_callbacks_from_configurable(): + adapter = _make_adapter() + cb = object() + config = {"callbacks": [cb]} + result = adapter.get_invoke_config(config) + assert result == {"callbacks": [cb]} + + +def test_get_invoke_config_falls_back_to_base_callbacks(): + AgentGraphAdapter = _get_adapter_class() + base_cb = object() + adapter = AgentGraphAdapter( + tracker=_make_tracker(), + base_callbacks=[base_cb], + task_todos_ref=[], + tools_context_ref={}, + base_tool_provider=None, + ) + result = adapter.get_invoke_config({}) + assert result == {"callbacks": [base_cb]} + + +# ── 5. normalize_response hook ──────────────────────────────────────────── + + +def test_normalize_response_strips_empty_content(): + adapter = _make_adapter() + response = SimpleNamespace(content=" hello ", additional_kwargs={}) + content, reasoning = adapter.normalize_response(response) + # normalize_assistant_text strips whitespace + assert content.strip() == "hello" + + +def test_normalize_response_extracts_reasoning(): + adapter = _make_adapter() + response = SimpleNamespace( + content="hi", + additional_kwargs={"reasoning_content": "I thought about it"}, + ) + _, reasoning = adapter.normalize_response(response) + assert reasoning == "I thought about it" + + +# ── 6. on_response_processed hook ──────────────────────────────────────── + + +def test_on_response_processed_calls_tracker_for_code(): + AgentGraphAdapter = _get_adapter_class() + tracker = _make_tracker() + adapter = AgentGraphAdapter( + tracker=tracker, + base_callbacks=[], + task_todos_ref=[], + tools_context_ref={}, + base_tool_provider=None, + ) + state = SimpleNamespace() + adapter.on_response_processed(state, code="print(1)", content="```python\nprint(1)\n```") + tracker.collect_step.assert_called() + + +def test_on_response_processed_calls_tracker_for_nl(): + AgentGraphAdapter = _get_adapter_class() + tracker = _make_tracker() + adapter = AgentGraphAdapter( + tracker=tracker, + base_callbacks=[], + task_todos_ref=[], + tools_context_ref={}, + base_tool_provider=None, + ) + state = SimpleNamespace() + adapter.on_response_processed(state, code=None, content="The answer is 42.") + tracker.collect_step.assert_called() + + +# ── 7. build_metadata_update hook ──────────────────────────────────────── + + +def test_build_metadata_update_cleans_empty_response_meta(): + adapter = _make_adapter() + state = SimpleNamespace(cuga_lite_metadata={"_empty_response_correction": True, "other_key": 1}) + result = adapter.build_metadata_update(state, playbook_fired=False) + assert "_empty_response_correction" not in result + assert result["other_key"] == 1 + + +def test_build_metadata_update_adds_playbook_flag_when_fired(): + adapter = _make_adapter() + state = SimpleNamespace(cuga_lite_metadata={"some_key": True}) + result = adapter.build_metadata_update(state, playbook_fired=True) + assert result["playbook_guidance_added"] is True + + +# ── 8. classify_auto_continue hook ─────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_classify_auto_continue_delegates_to_nl_classifier(): + adapter = _make_adapter() + state = SimpleNamespace() + mock_model = MagicMock() + + with patch( + "cuga.backend.cuga_graph.nodes.cuga_lite.agent_graph_adapter.classify_nl_auto_continue", + new_callable=AsyncMock, + return_value=True, + ) as mock_classify: + result = await adapter.classify_auto_continue(state, mock_model, "Let me continue.", "thought") + mock_classify.assert_called_once_with(mock_model, "Let me continue.", "thought") + assert result is True + + +@pytest.mark.asyncio +async def test_classify_auto_continue_returns_false_when_not_continuing(): + adapter = _make_adapter() + state = SimpleNamespace() + + with patch( + "cuga.backend.cuga_graph.nodes.cuga_lite.agent_graph_adapter.classify_nl_auto_continue", + new_callable=AsyncMock, + return_value=False, + ): + result = await adapter.classify_auto_continue(state, None, "All done.", None) + assert result is False + + +# ── 9. prepare_system_content hook ─────────────────────────────────────── + + +def test_prepare_system_content_no_todos_returns_base_prompt(): + adapter = _make_adapter(task_todos_ref=[]) + state = SimpleNamespace(task_todos=None) + result = adapter.prepare_system_content(state, {}, "You are an agent.") + assert result == "You are an agent." + + +def test_prepare_system_content_appends_todos_ref_when_present(): + todos = [{"title": "Step 1", "status": "pending"}] + adapter = _make_adapter(task_todos_ref=todos) + state = SimpleNamespace(task_todos=None) + result = adapter.prepare_system_content(state, {}, "You are an agent.") + assert result != "You are an agent." + assert len(result) > len("You are an agent.") + + +def test_resolve_max_steps_uses_override_when_given(): + adapter = _make_adapter() + state = SimpleNamespace(cuga_lite_max_steps=None) + assert adapter.resolve_max_steps(state, 10) == 10 + + +def test_resolve_max_steps_uses_state_when_set(): + adapter = _make_adapter() + state = SimpleNamespace(cuga_lite_max_steps=25) + assert adapter.resolve_max_steps(state, None) == 25 diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_cuga_lite_graph_evolve_guidelines.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_cuga_lite_graph_evolve_guidelines.py index 6115553f..3047c1e8 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_cuga_lite_graph_evolve_guidelines.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_cuga_lite_graph_evolve_guidelines.py @@ -56,7 +56,7 @@ async def test_cuga_lite_evolve_guidelines_are_injected_independently_of_legacy_ False, ), patch( - "cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_graph.apply_context_summarization", + "cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_nodes.apply_context_summarization", new=AsyncMock(side_effect=lambda messages, *args, **kwargs: messages), ), patch( diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_approval_handler.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_approval_handler.py index c07c64d9..64bc1c32 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_approval_handler.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_approval_handler.py @@ -4,7 +4,7 @@ Handles the detection, interruption, and resumption of tool approval flows. """ -from typing import TYPE_CHECKING, List, Optional +from typing import Any, List, Optional from loguru import logger from langchain_core.messages import AIMessage @@ -12,61 +12,43 @@ from langgraph.types import Command from langgraph.graph import END - -if TYPE_CHECKING: - from cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_graph import CugaLiteState +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import ( + CoreGraphAdapter, + append_chat_messages_with_step_limit as _core_append_with_step_limit, + create_error_command as _core_create_error_command, +) class ToolApprovalHandler: """Handles tool approval detection, interruption, and resumption logic.""" @staticmethod - def should_skip_policy_check(state: "CugaLiteState") -> bool: + def should_skip_policy_check(adapter: CoreGraphAdapter, state: Any) -> bool: """ Check if policy checking should be skipped. Returns True if we're returning from approval (user_approved=True), which preserves the approval state and prevents re-matching the same policy. - - Args: - state: Current CugaLiteState - - Returns: - True if policy check should be skipped, False otherwise """ - return bool(state.cuga_lite_metadata and state.cuga_lite_metadata.get("user_approved")) + md = adapter.get_metadata(state) + return bool(md and md.get("user_approved")) @staticmethod - def is_returning_from_approval(state: "CugaLiteState") -> bool: - """ - Check if we're returning from tool approval. - - Args: - state: Current CugaLiteState - - Returns: - True if returning from approval, False otherwise - """ - return bool(state.cuga_lite_metadata and state.cuga_lite_metadata.get("user_approved") is True) + def is_returning_from_approval(adapter: CoreGraphAdapter, state: Any) -> bool: + """Check if we're returning from tool approval.""" + md = adapter.get_metadata(state) + return bool(md and md.get("user_approved") is True) @staticmethod - def extract_approved_code(state: "CugaLiteState") -> Optional[str]: - """ - Extract the approved code from the last AI message. - - Args: - state: Current CugaLiteState with chat_messages - - Returns: - Extracted code string, or None if not found - """ - from cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_graph import ( + def extract_approved_code(adapter: CoreGraphAdapter, state: Any) -> Optional[str]: + """Extract the approved code from the last AI message.""" + from cuga.backend.cuga_graph.nodes.cuga_agent_core.code_extraction import ( extract_and_combine_codeblocks, ) # Find the last AI message last_ai_message = None - for msg in reversed(state.chat_messages): + for msg in reversed(adapter.get_messages(state)): if msg.type == "ai": last_ai_message = msg break @@ -107,70 +89,44 @@ def clean_approval_metadata(metadata: dict) -> dict: return {k: v for k, v in metadata.items() if k not in fields_to_remove} @staticmethod - def handle_approval_resumption(state: "CugaLiteState") -> Optional[Command]: - """ - Handle resumption after user approval. - - Extracts the approved code and routes to sandbox for execution. - - Args: - state: Current CugaLiteState - - Returns: - Command to route to sandbox, or error Command if code extraction fails - """ - from cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_graph import ( - create_error_command, - ) - + def handle_approval_resumption(adapter: CoreGraphAdapter, state: Any) -> Optional[Command]: + """Handle resumption after user approval: run the approved code.""" logger.info("Returning from tool approval - skipping code generation, executing approved code") # Extract code from last AI message - code = ToolApprovalHandler.extract_approved_code(state) + code = ToolApprovalHandler.extract_approved_code(adapter, state) if not code: logger.error("Could not extract code from last AI message after approval") - return create_error_command( - state.chat_messages, + return _core_create_error_command( + adapter, + adapter.get_messages(state), AIMessage(content="Failed to retrieve approved code for execution"), state.step_count, ) # Clean approval metadata - cleaned_metadata = ToolApprovalHandler.clean_approval_metadata(state.cuga_lite_metadata) + cleaned_metadata = ToolApprovalHandler.clean_approval_metadata(adapter.get_metadata(state)) - # Route to sandbox with approved code + # Route to the graph's execute node with approved code return Command( - goto="sandbox", + goto=adapter.execute_node_name, update={ "script": code, - "cuga_lite_metadata": cleaned_metadata, + adapter.metadata_key: cleaned_metadata, "step_count": state.step_count + 1, }, ) @staticmethod async def check_and_create_approval_interrupt( - state: "CugaLiteState", + adapter: CoreGraphAdapter, + state: Any, code: str, content: str, config: dict = None, ) -> Optional[Command]: - """ - Check if code requires approval and create interrupt if needed. - - This method checks ToolApproval policies directly against the generated code, - independent of the initial policy matching phase. - - Args: - state: Current CugaLiteState - code: Generated code to check - content: Full AI response content - config: Optional config containing policy system - - Returns: - Command to interrupt for approval, or None if no approval needed - """ + """Check if code requires approval and create an interrupt if needed.""" from cuga.backend.cuga_graph.policy.configurable import PolicyConfigurable try: @@ -200,7 +156,7 @@ async def check_and_create_approval_interrupt( # Store policy metadata for the approval flow approval_metadata = { - **state.cuga_lite_metadata, + **adapter.get_metadata(state), "policy_type": "tool_approval", "policy_id": policy.id, "policy_name": policy.name, @@ -212,10 +168,12 @@ async def check_and_create_approval_interrupt( } # Update state metadata temporarily for the interrupt creation - state.cuga_lite_metadata = approval_metadata + adapter.set_metadata(state, approval_metadata) # Create the approval interrupt - return ToolApprovalHandler._create_approval_interrupt(state, code, content, preview_lines) + return ToolApprovalHandler._create_approval_interrupt( + adapter, state, code, content, preview_lines + ) except Exception as e: logger.error(f"Error checking tool approval policies: {e}", exc_info=True) @@ -223,44 +181,30 @@ async def check_and_create_approval_interrupt( @staticmethod def _create_approval_interrupt( - state: "CugaLiteState", + adapter: CoreGraphAdapter, + state: Any, code: str, content: str, preview_lines: List[str], ) -> Command: - """ - Create an interrupt Command for tool approval. - - Args: - state: Current CugaLiteState - code: Generated code - content: Full AI response content - preview_lines: Code preview lines to show user - - Returns: - Command to exit subgraph and route to HITL - """ - from cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_graph import ( - append_chat_messages_with_step_limit, - create_error_command, - ) + """Create an interrupt Command for tool approval.""" from cuga.backend.cuga_graph.nodes.human_in_the_loop.followup_model import create_tool_approval_action + md = adapter.get_metadata(state) + # Create approval request metadata approval_metadata = { - **state.cuga_lite_metadata, + **md, "approval_required": True, "code_preview": preview_lines, - "full_code": code if state.cuga_lite_metadata.get("show_code_preview") else None, + "full_code": code if md.get("show_code_preview") else None, } # Extract policy details - policy_name = state.cuga_lite_metadata.get("policy_name", "Tool Approval") - approval_msg = state.cuga_lite_metadata.get( - "approval_message", "This tool requires your approval before execution." - ) - tools_list = state.cuga_lite_metadata.get("required_tools", []) - apps_list = state.cuga_lite_metadata.get("required_apps", []) + policy_name = md.get("policy_name", "Tool Approval") + approval_msg = md.get("approval_message", "This tool requires your approval before execution.") + tools_list = md.get("required_tools", []) + apps_list = md.get("required_apps", []) # Create HITL action for tool approval hitl_action = create_tool_approval_action( @@ -281,22 +225,22 @@ def _create_approval_interrupt( ) # Update messages - updated_messages, error_message = append_chat_messages_with_step_limit( - state, [AIMessage(content=content)] + updated_messages, error_message = _core_append_with_step_limit( + adapter, state, [AIMessage(content=content)] ) if error_message: - return create_error_command(updated_messages, error_message, state.step_count) + return _core_create_error_command(adapter, updated_messages, error_message, state.step_count) # Return command to exit subgraph and route to parent's SuggestHumanActions -> WaitForResponse return Command( - goto=END, # Exit subgraph to parent CugaLiteNode.callback_node + goto=END, # Exit subgraph to parent callback node update={ - "chat_messages": updated_messages, + adapter.messages_key: updated_messages, "script": code, "final_answer": final_answer_text, - "cuga_lite_metadata": approval_metadata, + adapter.metadata_key: approval_metadata, "hitl_action": hitl_action, # Set HITL action for parent to detect - "sender": "CugaLite", # Mark sender for return routing + "sender": adapter.sender_name, # Mark sender for return routing "step_count": state.step_count + 1, }, ) @@ -349,17 +293,9 @@ def _generate_approval_message( return "\n".join(content_lines) @staticmethod - def handle_denial(state: "CugaLiteState") -> Optional[Command]: - """ - Handle user denial of tool approval. - - Args: - state: Current CugaLiteState - - Returns: - Command to end execution, or None if not denied - """ - if state.cuga_lite_metadata.get("user_approved") is False: + def handle_denial(adapter: CoreGraphAdapter, state: Any) -> Optional[Command]: + """Handle user denial of tool approval.""" + if adapter.get_metadata(state).get("user_approved") is False: logger.warning("User denied tool approval - skipping execution") return Command( goto=END, diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/cuga_supervisor_graph.py b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/cuga_supervisor_graph.py index 4af99aa4..3dee6062 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/cuga_supervisor_graph.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/cuga_supervisor_graph.py @@ -7,32 +7,33 @@ Uses conversational mode: Supervisor acts as a single agent with delegation tools (similar to cuga_lite). """ -import re import inspect -import asyncio from typing import Any, Dict, List, Optional, Union, Tuple -from loguru import logger -from pathlib import Path from langchain_core.language_models import BaseChatModel -from langchain_core.runnables import RunnableConfig -from langchain_core.messages import BaseMessage, AIMessage, HumanMessage +from langchain_core.messages import BaseMessage, AIMessage -from langgraph.graph import END, START, StateGraph +from langgraph.graph import StateGraph from langgraph.types import Command from cuga.backend.cuga_graph.nodes.cuga_supervisor.cuga_supervisor_state import ( CugaSupervisorState, - AgentInfo, ) -from cuga.backend.cuga_graph.utils.context_management_utils import apply_context_summarization from cuga.sdk import CugaAgent from cuga.config import settings -from cuga.configurations.instructions_manager import get_all_instructions_formatted -from cuga.backend.cuga_graph.nodes.cuga_lite.executors import CodeExecutor - -# Pattern for extracting Python code blocks -BACKTICK_PATTERN = r'```python(.*?)```' +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import ( + CoreGraphAdapter, + append_chat_messages_with_step_limit as _core_append_with_step_limit, + create_error_command as _core_create_error_command, +) +from cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_nodes import ( + create_call_model_node as _create_shared_call_model_node, +) +from cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_graph import build_agent_graph +from cuga.backend.cuga_graph.nodes.cuga_supervisor.supervisor_graph_adapter import ( + SupervisorGraphAdapter, +) +from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ToolProviderInterface def _resolve_names_from_caller_frame(variable_names: List[str]) -> Dict[str, Any]: @@ -58,70 +59,43 @@ def _resolve_names_from_caller_frame(variable_names: List[str]) -> Dict[str, Any return resolved -def extract_and_combine_codeblocks(text: str) -> str: - """Extract all codeblocks from a text string and combine them.""" - code_blocks = re.findall(BACKTICK_PATTERN, text, re.DOTALL) - - if code_blocks: - processed_blocks = [] - for block in code_blocks: - block = block.strip() - processed_blocks.append(block) - - combined_code = "\n\n".join(processed_blocks) +class _CugaSupervisorLoopAdapter(CoreGraphAdapter): + """Supervisor seam: messages live on ``supervisor_chat_messages`` + (None-safe); step limit from ``state.cuga_lite_max_steps`` else + ``settings.advanced_features`` (default 50).""" - if "print(" not in combined_code: - return "" + messages_key = "supervisor_chat_messages" + # Approval seams (override the Lite defaults on CoreGraphAdapter). + metadata_key = "supervisor_metadata" + execute_node_name = "execute_agent_tool" + sender_name = "CugaSupervisor" - return combined_code - - stripped_text = text.strip() - - if "print(" not in stripped_text: - return "" - - try: - compile(stripped_text.replace('await ', ''), '', 'exec') - return stripped_text - except SyntaxError: - return "" + def get_messages(self, state: CugaSupervisorState) -> List[BaseMessage]: + return state.supervisor_chat_messages or [] + def resolve_max_steps(self, state: CugaSupervisorState, override: Optional[int]) -> int: + if override is not None: + return override + return ( + state.cuga_lite_max_steps + if state.cuga_lite_max_steps is not None + else getattr(settings.advanced_features, 'cuga_lite_max_steps', 50) + ) -def make_tool_awaitable(func): - """Wrap a sync function to make it awaitable (since agent always uses await).""" - if inspect.iscoroutinefunction(func): - return func + def get_variable_manager(self, state: CugaSupervisorState): + # Supervisor stores execution vars on its own manager, not the + # root state.variables_manager (the phase-9 coupling fix builds here). + return state.supervisor_variables_manager - async def async_wrapper(*args, **kwargs): - loop = asyncio.get_event_loop() - return await loop.run_in_executor(None, lambda: func(*args, **kwargs)) - return async_wrapper +_SUPERVISOR_LOOP_ADAPTER = _CugaSupervisorLoopAdapter() def append_chat_messages_with_step_limit( state: CugaSupervisorState, new_messages: List[BaseMessage] ) -> Tuple[List[BaseMessage], Optional[AIMessage]]: - """Append new messages to supervisor_chat_messages with step counting and limit checking.""" - max_steps = ( - state.cuga_lite_max_steps - if state.cuga_lite_max_steps is not None - else getattr(settings.advanced_features, 'cuga_lite_max_steps', 50) - ) - new_step_count = state.step_count + 1 - - if new_step_count > max_steps: - error_msg = ( - f"Maximum step limit ({max_steps}) reached. " - f"The task has exceeded the allowed number of execution cycles. " - f"Please simplify your request or break it into smaller tasks." - ) - logger.warning(f"Step limit reached: {new_step_count} > {max_steps}") - error_ai_message = AIMessage(content=error_msg) - return (state.supervisor_chat_messages or []) + new_messages + [error_ai_message], error_ai_message - - logger.debug(f"Step count: {new_step_count}/{max_steps}") - return (state.supervisor_chat_messages or []) + new_messages, None + """Append messages to ``supervisor_chat_messages`` with step limit check.""" + return _core_append_with_step_limit(_SUPERVISOR_LOOP_ADAPTER, state, new_messages) def create_error_command( @@ -131,24 +105,16 @@ def create_error_command( additional_updates: Optional[Dict[str, Any]] = None, ) -> Command: """Create a Command to END with error information.""" - updates = { - "supervisor_chat_messages": updated_messages, - "script": None, - "final_answer": error_message.content, - "execution_complete": True, - "error": error_message.content, - "step_count": step_count + 1, - } - if additional_updates: - updates.update(additional_updates) - - return Command(goto=END, update=updates) + return _core_create_error_command( + _SUPERVISOR_LOOP_ADAPTER, updated_messages, error_message, step_count, additional_updates + ) def create_cuga_supervisor_graph( supervisor_model: BaseChatModel, agents: Dict[str, Union[CugaAgent, Dict[str, Any]]], special_instructions: Optional[str] = None, + tool_provider: Optional[ToolProviderInterface] = None, ) -> StateGraph: """ Create supervisor subgraph that orchestrates multiple CugaAgent instances. @@ -157,523 +123,36 @@ def create_cuga_supervisor_graph( supervisor_model: The language model for the supervisor agents: Dict mapping agent names to CugaAgent instances (internal) or A2A config (external) special_instructions: Optional workflow instructions injected into the supervisor's system prompt + tool_provider: Optional provider for MCP/external tools available to the supervisor directly Returns: StateGraph implementing the CugaSupervisor architecture """ - return _create_supervisor_conversational_graph(supervisor_model, agents, special_instructions) + return _create_supervisor_conversational_graph( + supervisor_model, agents, special_instructions, tool_provider=tool_provider + ) def _create_supervisor_conversational_graph( supervisor_model: BaseChatModel, agents: Dict[str, Union[CugaAgent, Dict[str, Any]]], special_instructions: Optional[str] = None, + tool_provider: Optional[ToolProviderInterface] = None, ) -> StateGraph: - """ - Create supervisor conversational mode graph - supervisor acts as a single agent with delegation tools. - - Similar to cuga_lite but uses agent delegation tools instead of regular tools. - The supervisor can call agents via Python code, similar to how cuga_lite calls tools. - - Args: - supervisor_model: The language model for the supervisor - agents: Dict mapping agent names to CugaAgent instances (internal) or A2A config (external) - - Returns: - StateGraph implementing the Supervisor Conversational architecture - """ - from cuga.backend.cuga_graph.nodes.cuga_supervisor.a2a_protocol import ( - A2AProtocol, - HAS_A2A_SDK, - _agent_card_description, - delegate_task_via_a2a_sdk, - fetch_agent_card, - format_agent_card_for_prompt, + """Create supervisor conversational mode graph.""" + sup_adapter = SupervisorGraphAdapter( + agents=agents, + special_instructions=special_instructions, + tool_provider=tool_provider, + ) + prepare_node = sup_adapter.build_prepare_node() + execute_node = sup_adapter.build_execute_node() + call_model_node = _create_shared_call_model_node(sup_adapter, supervisor_model, settings) + + return build_agent_graph( + adapter=sup_adapter, + state_class=CugaSupervisorState, + prepare_node=prepare_node, + call_model_node=call_model_node, + execute_node=execute_node, ) - - # Load prompt template as string (for Jinja2 rendering) - prompt_filename = "supervisor_lite_prompt.jinja2" # Keep filename for now (backward compatibility) - if settings.advanced_features.enable_todos: - # TODO: Create supervisor_conversational_prompt_todos.jinja2 if needed - prompt_filename = "supervisor_lite_prompt.jinja2" - prompt_path = Path(__file__).parent / "prompts" / prompt_filename - # Read template file directly as string for Jinja2 - with open(prompt_path, 'r', encoding='utf-8') as f: - prompt_template_str = f.read() - instructions = get_all_instructions_formatted() - - # Create mutable agent delegation tools context - agent_tools_context = {} - pass_variables_a2a = getattr(settings.supervisor, "pass_variables_a2a", False) - - def create_agent_delegation_func( - agent_name: str, - agent_or_config: Union[CugaAgent, Dict[str, Any]], - agent_card: Any = None, - ): - """Create a delegation function for a specific agent. agent_card is set for external A2A (http) when using a2a-sdk.""" - - async def delegate_to_agent(task: str, variables: Optional[List[str]] = None) -> Any: - logger.info(f"Delegating to {agent_name}: {task[:100]}...") - - if isinstance(agent_or_config, CugaAgent): - vars_to_pass = {} - if variables is not None: - vars_to_pass = _resolve_names_from_caller_frame(variables) - result = await agent_or_config.invoke( - task, - thread_id=f"supervisor_conversational_{agent_name}", - variables=vars_to_pass if vars_to_pass else None, - ) - return result.answer if hasattr(result, "answer") else str(result) - - if isinstance(agent_or_config, dict) and agent_or_config.get("type") == "external": - a2a_config = agent_or_config.get("config", {}).get("a2a_protocol", {}) - endpoint = a2a_config.get("endpoint") - transport = a2a_config.get("transport", "http") - - if agent_card is not None and HAS_A2A_SDK and transport == "http": - vars_to_pass = {} - if pass_variables_a2a and variables is not None: - vars_to_pass = _resolve_names_from_caller_frame(variables) - result = await delegate_task_via_a2a_sdk( - agent_card, - task, - auth=a2a_config.get("auth"), - timeout=float(a2a_config.get("timeout", 30)), - variables=vars_to_pass if vars_to_pass else None, - ) - return result.get("result", "") - else: - a2a_protocol = A2AProtocol(endpoint=endpoint, transport=transport) - await a2a_protocol.connect() - try: - vars_to_pass = {} - if variables is not None: - vars_to_pass = _resolve_names_from_caller_frame(variables) - result = await a2a_protocol.delegate_task( - target_agent=agent_name, - task=task, - context={"thread_id": None}, - variables=vars_to_pass, - ) - return result.get("result", "") - finally: - await a2a_protocol.disconnect() - - return f"Error: Unknown agent type for {agent_name}" - - return delegate_to_agent - - # Factory function to create prepare_agents_and_prompt node - def create_prepare_agents_and_prompt_node(base_agents, base_prompt_template_str, base_instructions): - """Factory to create prepare node with closure over agents and prompt template.""" - - async def prepare_agents_and_prompt( - state: CugaSupervisorState, config: Optional[RunnableConfig] = None - ) -> Command: - """Prepare agents, create delegation tools, and generate prompt.""" - logger.info("Preparing agents and prompt for supervisor conversational mode") - - # Build agent info for prompt - agent_list = [] - agent_tools_for_prompt = [] - - for agent_name, agent_or_config in base_agents.items(): - agent_card = None - if isinstance(agent_or_config, CugaAgent): - agent_type = "internal" - description = getattr(agent_or_config, 'description', f"Internal agent: {agent_name}") - elif isinstance(agent_or_config, dict): - agent_type = agent_or_config.get("type", "external") - a2a_cfg = agent_or_config.get("config", {}).get("a2a_protocol", {}) - if agent_type == "external" and HAS_A2A_SDK and a2a_cfg.get("transport") == "http": - endpoint = a2a_cfg.get("endpoint") - if endpoint: - try: - agent_card = await fetch_agent_card( - endpoint, - auth=a2a_cfg.get("auth"), - timeout=float(a2a_cfg.get("timeout", 30)), - ) - description = _agent_card_description(agent_card) - except Exception as e: - logger.warning(f"Failed to fetch A2A agent card for {agent_name}: {e}") - description = agent_or_config.get( - "description", f"External agent: {agent_name}" - ) - else: - description = agent_or_config.get("description", f"External agent: {agent_name}") - else: - description = agent_or_config.get("description", f"{agent_type} agent: {agent_name}") - else: - agent_type = "unknown" - description = f"Agent: {agent_name}" - - agent_entry = { - "name": agent_name, - "type": agent_type, - "description": description, - } - if agent_card is not None: - agent_entry["agent_card"] = format_agent_card_for_prompt(agent_card) - agent_list.append(agent_entry) - - tool_name = f"delegate_to_{agent_name}" - tool_func = create_agent_delegation_func(agent_name, agent_or_config, agent_card=agent_card) - agent_tools_context[tool_name] = tool_func - - is_a2a_agent = agent_card is not None - if is_a2a_agent and pass_variables_a2a: - tool_info = { - "name": tool_name, - "description": f"Delegate a task to the {agent_name} agent. {description} Variables are passed in request metadata.", - "params_str": "task: str, variables: Optional[List[str]] = None", - "params_doc": f"- task (str): The task description to send to {agent_name}\n- variables (Optional[List[str]]): Variable names to pass in A2A metadata (e.g. ['customer_id', 'order_data'])", - "response_doc": f"Returns the result from {agent_name}.", - } - elif is_a2a_agent: - tool_info = { - "name": tool_name, - "description": f"Delegate a task to {agent_name}. {description}", - "params_str": "task: str", - "params_doc": f"- task (str): The task description to send to {agent_name}.", - "response_doc": f"Returns the result from {agent_name}.", - } - else: - tool_info = { - "name": tool_name, - "description": f"Delegate a task to the {agent_name} agent. This agent specializes in: {description}", - "params_str": "task: str, variables: Optional[List[str]] = None", - "params_doc": f"- task (str): The task description to delegate to {agent_name}\n- variables (Optional[List[str]]): List of variable names to pass to the agent (e.g., ['customer_id', 'order_data'])", - "response_doc": f"Returns the result from {agent_name} agent execution.", - } - agent_tools_for_prompt.append(tool_info) - - # Always enable todos tool for supervisor conversational mode - from cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_graph import create_update_todos_tool - - todos_tool = await create_update_todos_tool() - agent_tools_context['create_update_todos'] = make_tool_awaitable(todos_tool.func) - agent_tools_for_prompt.append( - { - "name": "create_update_todos", - "description": todos_tool.description, - "params_str": "todos: List[Dict[str, str]]", - "params_doc": "todos: List of todo items, each with 'text' and 'status' ('pending' or 'completed')", - "response_doc": "Returns the current list of todos with their status.", - } - ) - - # Create prompt using template (similar to create_mcp_prompt) - is_autonomous_subtask = state.sub_task is not None and state.sub_task.strip() != "" - - # Use Jinja2 template rendering - from jinja2 import Template - - template = Template(base_prompt_template_str) - dynamic_prompt = template.render( - base_prompt=None, - agents=agent_list, - tools=agent_tools_for_prompt, - is_autonomous_subtask=is_autonomous_subtask, - instructions=base_instructions, - enable_todos=True, # Always enable todos for supervisor conversational mode - special_instructions=special_instructions, - ) - - return Command( - goto="call_model", - update={ - "tools_prepared": True, - "prepared_prompt": dynamic_prompt, - "step_count": 0, - "available_agents": { - name: AgentInfo( - name=name, type=info["type"], description=info["description"] - ).model_dump() - for name, info in zip([a["name"] for a in agent_list], agent_list) - }, - }, - ) - - return prepare_agents_and_prompt - - # Factory function to create call_model node - def create_call_model_node(base_model): - """Factory to create call_model node with closure over model.""" - - async def call_model(state: CugaSupervisorState, config: Optional[RunnableConfig] = None) -> Command: - """Call the LLM to generate code or text response.""" - # ============================================================================ - # CONTEXT SUMMARIZATION - Manage context before LLM invocation - # ============================================================================ - effective_chat_messages = await apply_context_summarization( - state.supervisor_chat_messages or [], - base_model, - system_prompt=state.prepared_prompt, - tools=None, # Supervisor doesn't use traditional tools - tracker=None, # Supervisor doesn't have a tracker - variables_storage=state.supervisor_variables, - variable_counter_state=state.variable_counter_state, - variable_creation_order=state.variable_creation_order, - message_list_name="supervisor_chat_messages", # Use supervisor message list - ) - # ============================================================================ - # END CONTEXT SUMMARIZATION BLOCK - # ============================================================================ - - logger.info("Supervisor conversational: calling model") - - # Get prompt from state - dynamic_prompt = state.prepared_prompt - - # Convert supervisor_chat_messages to messages for model - messages_for_model = [{"role": "system", "content": dynamic_prompt}] - - # Add chat history from supervisor_chat_messages - # Also add variables summary if available - var_manager = state.supervisor_variables_manager - existing_variable_names = var_manager.get_variable_names() - variables_summary_text = None - - if existing_variable_names: - variables_summary_text = var_manager.get_variables_summary( - variable_names=existing_variable_names - ) - variables_addendum = f"\n\n## Available Variables\n\n{variables_summary_text}\n\nYou can use these variables directly by their names." - - logger.info( - f"Processing {len(effective_chat_messages)} supervisor_chat_messages for model invocation" - ) - - # Create a copy of the messages list to avoid mutating the original until we return - modified_chat_messages = list(effective_chat_messages) - - for i, msg in enumerate(modified_chat_messages): - msg_type = type(msg).__name__ - msg_role = getattr(msg, 'type', None) - logger.debug( - f"Message {i}: type={msg_type}, role={msg_role}, isinstance(HumanMessage)={isinstance(msg, HumanMessage)}, isinstance(AIMessage)={isinstance(msg, AIMessage)}" - ) - - if isinstance(msg, HumanMessage): - content = msg.content - content_modified = False - - # Add variables summary to the LAST user message only - if variables_summary_text and i == len(modified_chat_messages) - 1: - content = content + variables_addendum - content_modified = True - logger.debug("Added variables summary to last user message") - - # Update the local copy if content was modified - if content_modified: - modified_chat_messages[i] = HumanMessage(content=content) - logger.debug(f"Updated modified_chat_messages[{i}] with modified content (variables)") - - messages_for_model.append({"role": "user", "content": content}) - elif isinstance(msg, AIMessage): - messages_for_model.append({"role": "assistant", "content": msg.content}) - else: - # Handle generic BaseMessage by checking the 'type' attribute - if ( - msg_role == 'human' - or msg_role == 'user' - or (isinstance(msg, dict) and msg.get("type") == "human") - ): - content = msg.content if hasattr(msg, 'content') else msg.get("content", "") - content_modified = False - - # Add variables summary to the LAST user message only - if variables_summary_text and i == len(modified_chat_messages) - 1: - content = content + variables_addendum - content_modified = True - logger.debug("Added variables summary to last user message") - - # Update the local copy if content was modified - if content_modified: - modified_chat_messages[i] = HumanMessage(content=content) - logger.debug( - f"Updated modified_chat_messages[{i}] with modified content (variables)" - ) - - messages_for_model.append({"role": "user", "content": content}) - logger.debug(f"Added BaseMessage as user message (role={msg_role})") - elif ( - msg_role == 'ai' - or msg_role == 'assistant' - or (isinstance(msg, dict) and msg.get("type") == "ai") - ): - content = msg.content if hasattr(msg, 'content') else msg.get("content", "") - messages_for_model.append({"role": "assistant", "content": content}) - logger.debug(f"Added BaseMessage as assistant message (role={msg_role})") - else: - logger.warning( - f"Skipping message {i} with unknown type: {msg_type}, role: {msg_role}" - ) - - logger.debug(f"Total messages for model (including system): {len(messages_for_model)}") - - response = await base_model.ainvoke(messages_for_model, config=config or {}) - content = response.content - reasoning_content = response.additional_kwargs.get('reasoning_content') - - # Extract code - code = extract_and_combine_codeblocks(content) if content else "" - if not code and reasoning_content: - code = extract_and_combine_codeblocks(reasoning_content) - - if code: - logger.info(f"Supervisor conversational: extracted code block ({len(code)} chars)") - # Append AI response to our local modified_chat_messages - final_messages = modified_chat_messages + [AIMessage(content=content)] - - # Check step limit - max_steps = getattr(settings.advanced_features, 'cuga_lite_max_steps', 50) - new_step_count = state.step_count + 1 - - if new_step_count > max_steps: - error_msg = ( - f"Maximum step limit ({max_steps}) reached. " - f"The task has exceeded the allowed number of execution cycles. " - f"Please simplify your request or break it into smaller tasks." - ) - logger.warning(f"Step limit reached: {new_step_count} > {max_steps}") - error_ai_message = AIMessage(content=error_msg) - final_messages = final_messages + [error_ai_message] - return create_error_command(final_messages, error_ai_message, state.step_count) - - return Command( - goto="execute_agent_tool", - update={ - "supervisor_chat_messages": final_messages, - "script": code, - "step_count": new_step_count, - }, - ) - else: - # No code - final text answer - logger.info("Supervisor conversational: final text answer (no code)") - # Append AI response to our local modified_chat_messages - final_messages = modified_chat_messages + [AIMessage(content=content)] - - # Check step limit - max_steps = getattr(settings.advanced_features, 'cuga_lite_max_steps', 50) - new_step_count = state.step_count + 1 - - if new_step_count > max_steps: - error_msg = ( - f"Maximum step limit ({max_steps}) reached. " - f"The task has exceeded the allowed number of execution cycles. " - f"Please simplify your request or break it into smaller tasks." - ) - logger.warning(f"Step limit reached: {new_step_count} > {max_steps}") - error_ai_message = AIMessage(content=error_msg) - final_messages = final_messages + [error_ai_message] - return create_error_command(final_messages, error_ai_message, state.step_count) - - return Command( - goto=END, - update={ - "supervisor_chat_messages": final_messages, - "script": None, - "final_answer": content, - "execution_complete": True, - "step_count": new_step_count, - }, - ) - - return call_model - - # Factory function to create execute_agent_tool node - def create_execute_agent_tool_node(base_agent_tools_context): - """Factory to create execute_agent_tool node with closure over agent tools.""" - - async def execute_agent_tool(state: CugaSupervisorState, config: Optional[RunnableConfig] = None): - """Execute code with agent delegation tools available.""" - logger.info("Supervisor conversational: executing agent delegation code") - - # Get existing variables - existing_vars = {} - var_manager = state.supervisor_variables_manager - for var_name in var_manager.get_variable_names(): - existing_vars[var_name] = var_manager.get_variable(var_name) - - # Add agent tools to context - context = {**existing_vars, **base_agent_tools_context} - - try: - # Execute code using CodeExecutor (reuse from cuga_lite) - output, new_vars = await CodeExecutor.eval_with_tools_async( - code=state.script, - _locals=context, - state=state, # Pass state for variables_manager - thread_id=state.thread_id, - apps_list=None, # Not needed for agent delegation - ) - - logger.debug(f"Execution output: {output.strip()[:500]}...") - - # Update variables - for name, value in new_vars.items(): - state.supervisor_variables_manager.add_variable( - value, name=name, description="Created during agent delegation execution" - ) - - # Create execution message - execution_message_content = f"Execution output:\n{output}" - new_message = HumanMessage(content=execution_message_content) - updated_messages, error_message = append_chat_messages_with_step_limit(state, [new_message]) - - if error_message: - return create_error_command( - updated_messages, - error_message, - state.step_count, - additional_updates={ - "supervisor_variables": state.supervisor_variables, - }, - ) - - return { - "supervisor_chat_messages": updated_messages, - "supervisor_variables": state.supervisor_variables, - "step_count": state.step_count + 1, - } - except Exception as e: - error_msg = f"Error during execution: {str(e)}" - logger.error(error_msg, exc_info=True) - new_message = HumanMessage(content=error_msg) - updated_messages, limit_error_message = append_chat_messages_with_step_limit( - state, [new_message] - ) - - if limit_error_message: - return create_error_command(updated_messages, limit_error_message, state.step_count) - - return { - "supervisor_chat_messages": updated_messages, - "error": error_msg, - "execution_complete": True, - "step_count": state.step_count + 1, - } - - return execute_agent_tool - - # Create node instances - prepare_node = create_prepare_agents_and_prompt_node(agents, prompt_template_str, instructions) - call_model_node = create_call_model_node(supervisor_model) - execute_agent_tool_node = create_execute_agent_tool_node(agent_tools_context) - - # Build the graph - graph = StateGraph(CugaSupervisorState) - graph.add_node("prepare_agents_and_prompt", prepare_node) - graph.add_node("call_model", call_model_node) - graph.add_node("execute_agent_tool", execute_agent_tool_node) - - graph.add_edge(START, "prepare_agents_and_prompt") - graph.add_edge("prepare_agents_and_prompt", "call_model") - graph.add_edge("execute_agent_tool", "call_model") # Loop back to call_model after execution - - return graph diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/cuga_supervisor_node.py b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/cuga_supervisor_node.py index 20d31976..4d5fcda6 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/cuga_supervisor_node.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/cuga_supervisor_node.py @@ -213,6 +213,45 @@ async def callback_node(self, state: AgentState, config: Optional[RunnableConfig f" - state.supervisor_chat_messages: {len(state.supervisor_chat_messages) if state.supervisor_chat_messages else 0} messages" ) + # Tool-approval HITL resume (Slice B). Kept separate from the + # AGENT_APPROVAL block below because the approval interrupt sets + # final_answer, which that block's `not final_answer` guard would + # skip. Additive: only fires for TOOL_APPROVAL, which never occurs + # for Supervisor unless settings.policy.enabled. + if ( + state.sender == "WaitForResponse" + and state.hitl_response + and state.hitl_response.action_id == ActionIds.TOOL_APPROVAL + ): + if state.hitl_response.confirmed: + logger.info("User approved supervisor tool execution - resuming subgraph") + sd = state.model_dump() + if not sd.get("supervisor_chat_messages"): + sd["supervisor_chat_messages"] = [] + sd["supervisor_metadata"] = { + **(sd.get("supervisor_metadata") or {}), + "approval_required": False, + "user_approved": True, + } + sd["hitl_action"] = None + sd["hitl_response"] = None + sd["final_answer"] = "" # clear approval message so the subgraph runs + return Command( + update=CugaSupervisorState(**sd).model_dump(), + goto="CugaSupervisorSubgraph", + ) + else: + logger.warning("User denied supervisor tool execution - stopping") + policy_name = (state.supervisor_metadata or {}).get("policy_name", "Tool Approval") + state.final_answer = ( + f"❌ **Execution Cancelled**\n\nYou denied execution required by " + f"**{policy_name}**. The supervisor will not proceed with this task." + ) + state.execution_complete = True + state.hitl_response = None + state.sender = self.name + return Command(update=state.model_dump(), goto="FinalAnswerAgent") + # Handle human-in-the-loop responses (when coming back from WaitForResponse) # Only process if we don't already have a final_answer (to prevent loops) if state.sender == "WaitForResponse" and state.hitl_response and not state.final_answer: @@ -257,9 +296,15 @@ async def callback_node(self, state: AgentState, config: Optional[RunnableConfig state.sender = self.name return Command(update=state.model_dump(), goto="FinalAnswerAgent") - # Check if we need to route to HITL for agent approval (first time, after subgraph) - if state.hitl_action and state.hitl_action.action_id == ActionIds.AGENT_APPROVAL: - logger.info("Agent approval required - routing to SuggestHumanActions") + # Route to HITL for agent OR tool approval (first time, after subgraph). + # TOOL_APPROVAL is additive (Slice B) — it never fires for Supervisor + # unless settings.policy.enabled and a ToolApproval policy matched; + # AGENT_APPROVAL behavior is unchanged. + if state.hitl_action and state.hitl_action.action_id in ( + ActionIds.AGENT_APPROVAL, + ActionIds.TOOL_APPROVAL, + ): + logger.info("Approval required - routing to SuggestHumanActions") # Set sender so WaitForResponse knows where to return to state.sender = self.name logger.info(f"Set sender to: {state.sender}") diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/supervisor_graph_adapter.py b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/supervisor_graph_adapter.py new file mode 100644 index 00000000..52b1495d --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/supervisor_graph_adapter.py @@ -0,0 +1,542 @@ +"""SupervisorGraphAdapter — CoreGraphAdapter implementation for CugaSupervisor. + +Provides all hook overrides that the shared ``create_call_model_node`` factory +delegates to for Supervisor-specific behaviour: + +- messages_key, metadata_key, execute_node_name, sender_name attributes +- get_messages: reads state.supervisor_chat_messages +- resolve_max_steps: state.cuga_lite_max_steps → settings default +- get_variable_manager: state.supervisor_variables_manager (Phase-9 coupling fix) +- get_variables_storage: state.supervisor_variables +- build_prepare_node(): returns the prepare_agents_and_prompt async node +- build_execute_node(): returns the execute_agent_tool async node + +``_resolve_names_from_caller_frame`` is a module-level helper moved here from +``cuga_supervisor_graph.py`` so delegation functions can resolve variable names +from the delegating code's caller frame. +""" + +from __future__ import annotations + +import inspect +import os +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional + +from langchain_core.messages import BaseMessage, HumanMessage +from loguru import logger + +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import CoreGraphAdapter +from cuga.config import settings + + +# ── Module-level helper (moved from cuga_supervisor_graph.py) ────────────── + + +def _resolve_names_from_caller_frame(variable_names: List[str]) -> Dict[str, Any]: + """Resolve names from the delegated code's caller frame. + + LocalExecutor injects supervisor context into ``_async_main``'s globals; only + using ``f_locals`` missed those bindings, so sub-agents received no variables + and tasks showed e.g. ``amount=None``. + """ + resolved: Dict[str, Any] = {} + frame = inspect.currentframe() + try: + caller = frame.f_back if frame is not None else None + if caller is None: + return resolved + for name in variable_names: + if name in caller.f_locals: + resolved[name] = caller.f_locals[name] + elif name in caller.f_globals: + resolved[name] = caller.f_globals[name] + finally: + del frame + return resolved + + +# ── SupervisorGraphAdapter ───────────────────────────────────────────────── + + +class SupervisorGraphAdapter(CoreGraphAdapter): + """CoreGraphAdapter implementation for the CugaSupervisor multi-agent graph. + + Overrides the hook methods that differ from the no-op defaults and provides + ``build_prepare_node`` / ``build_execute_node`` factories that produce the + graph nodes parameterised by the agents and tool configuration captured at + construction time. + """ + + messages_key: str = "supervisor_chat_messages" + execute_node_name: str = "execute_agent_tool" + metadata_key: str = "supervisor_metadata" + sender_name: str = "CugaSupervisor" + + def __init__( + self, + *, + agents: Dict[str, Any], + special_instructions: Optional[str] = None, + tool_provider: Optional[Any] = None, + ) -> None: + self._agents = agents + self._special_instructions = special_instructions + self._tool_provider = tool_provider + # Mutable state shared between the prepare and execute node closures + self._agent_tools_context: Dict[str, Any] = {} + self._shared_vm_ref: List[Any] = [None] + + # ── Abstract method implementations ─────────────────────────────────── + + def get_messages(self, state: Any) -> List[BaseMessage]: + return state.supervisor_chat_messages or [] + + def resolve_max_steps(self, state: Any, override: Optional[int]) -> int: + if override is not None: + return override + return ( + state.cuga_lite_max_steps + if getattr(state, "cuga_lite_max_steps", None) is not None + else getattr(settings.advanced_features, "cuga_lite_max_steps", 50) + ) + + # ── Hook overrides ───────────────────────────────────────────────────── + + def get_variable_manager(self, state: Any) -> Any: + return getattr(state, "supervisor_variables_manager", None) + + def get_variables_storage(self, state: Any) -> Optional[Any]: + return getattr(state, "supervisor_variables", None) + + # ── Node factories ───────────────────────────────────────────────────── + + def build_prepare_node(self) -> Callable: + """Return the ``prepare_agents_and_prompt`` async node function.""" + from cuga.backend.cuga_graph.nodes.cuga_supervisor.cuga_supervisor_state import ( + AgentInfo, + CugaSupervisorState, + ) + from cuga.backend.cuga_graph.nodes.cuga_lite.tool_approval_handler import ToolApprovalHandler + from cuga.backend.cuga_graph.nodes.cuga_agent_core.runtime_tools import ( + build_runtime_tools, + prompt_tool_dicts, + resolve_runtime_backends, + ) + from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution_policy import ( + ExecutionRouter, + split_execution_note, + ) + from cuga.configurations.instructions_manager import get_all_instructions_formatted + from langchain_core.runnables import RunnableConfig + from langgraph.types import Command + + adapter = self + + prompt_filename = "supervisor_lite_prompt.jinja2" + prompt_path = Path(__file__).parent / "prompts" / prompt_filename + with open(prompt_path, "r", encoding="utf-8") as _f: + _prompt_template_str = _f.read() + _instructions = get_all_instructions_formatted() + + def _create_agent_delegation_func( + agent_name: str, + agent_or_config: Any, + agent_card: Any = None, + ) -> Callable: + from cuga.backend.cuga_graph.nodes.cuga_supervisor.a2a_protocol import ( + A2AProtocol, + HAS_A2A_SDK, + delegate_task_via_a2a_sdk, + ) + from cuga.sdk import CugaAgent + + pass_variables_a2a = getattr(settings.supervisor, "pass_variables_a2a", False) + + async def delegate_to_agent(task: str, variables: Optional[List[str]] = None) -> Any: + logger.info(f"Delegating to {agent_name}: {task[:100]}...") + + if isinstance(agent_or_config, CugaAgent): + vars_to_pass = {} + if variables is not None: + vars_to_pass = _resolve_names_from_caller_frame(variables) + result = await agent_or_config.invoke( + task, + thread_id=f"supervisor_conversational_{agent_name}", + variables=vars_to_pass if vars_to_pass else None, + ) + if ( + hasattr(result, "variables") + and result.variables + and adapter._shared_vm_ref[0] is not None + ): + from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import ( + VariableBridge, + ) + + bridged = VariableBridge.bridge( + result.variables, + adapter._shared_vm_ref[0], + description_prefix=f"from {agent_name}", + ) + if bridged: + logger.info( + "Bridged %d variable(s) from %s: %s", len(bridged), agent_name, bridged + ) + return result.answer if hasattr(result, "answer") else str(result) + + if isinstance(agent_or_config, dict) and agent_or_config.get("type") == "external": + a2a_config = agent_or_config.get("config", {}).get("a2a_protocol", {}) + endpoint = a2a_config.get("endpoint") + transport = a2a_config.get("transport", "http") + + if agent_card is not None and HAS_A2A_SDK and transport == "http": + vars_to_pass = {} + if pass_variables_a2a and variables is not None: + vars_to_pass = _resolve_names_from_caller_frame(variables) + result = await delegate_task_via_a2a_sdk( + agent_card, + task, + auth=a2a_config.get("auth"), + timeout=float(a2a_config.get("timeout", 30)), + variables=vars_to_pass if vars_to_pass else None, + ) + return result.get("result", "") + else: + a2a_protocol = A2AProtocol(endpoint=endpoint, transport=transport) + await a2a_protocol.connect() + try: + vars_to_pass = {} + if variables is not None: + vars_to_pass = _resolve_names_from_caller_frame(variables) + result = await a2a_protocol.delegate_task( + target_agent=agent_name, + task=task, + context={"thread_id": None}, + variables=vars_to_pass, + ) + return result.get("result", "") + finally: + await a2a_protocol.disconnect() + + return f"Error: Unknown agent type for {agent_name}" + + return delegate_to_agent + + async def prepare_agents_and_prompt( + state: CugaSupervisorState, config: Optional[RunnableConfig] = None + ) -> Command: + logger.info("Preparing agents and prompt for supervisor conversational mode") + + if settings.policy.enabled and not ToolApprovalHandler.should_skip_policy_check(adapter, state): + from cuga.backend.cuga_graph.policy.enactment import PolicyEnactment + from cuga.backend.cuga_graph.policy.models import PolicyType + + policy_command, policy_metadata = await PolicyEnactment.check_and_enact( + state, + config, + policy_types=[ + PolicyType.INTENT_GUARD, + PolicyType.PLAYBOOK, + PolicyType.TOOL_GUIDE, + ], + adapter=adapter, + ) + if policy_command: + return policy_command + if policy_metadata: + adapter.set_metadata(state, policy_metadata) + + from cuga.backend.cuga_graph.nodes.cuga_supervisor.a2a_protocol import ( + HAS_A2A_SDK, + _agent_card_description, + fetch_agent_card, + format_agent_card_for_prompt, + ) + from cuga.sdk import CugaAgent + + agent_list = [] + agent_tools_for_prompt = [] + pass_variables_a2a = getattr(settings.supervisor, "pass_variables_a2a", False) + + for agent_name, agent_or_config in adapter._agents.items(): + agent_card = None + if isinstance(agent_or_config, CugaAgent): + agent_type = "internal" + description = getattr(agent_or_config, "description", f"Internal agent: {agent_name}") + elif isinstance(agent_or_config, dict): + agent_type = agent_or_config.get("type", "external") + a2a_cfg = agent_or_config.get("config", {}).get("a2a_protocol", {}) + if agent_type == "external" and HAS_A2A_SDK and a2a_cfg.get("transport") == "http": + endpoint = a2a_cfg.get("endpoint") + if endpoint: + try: + agent_card = await fetch_agent_card( + endpoint, + auth=a2a_cfg.get("auth"), + timeout=float(a2a_cfg.get("timeout", 30)), + ) + description = _agent_card_description(agent_card) + except Exception as e: + logger.warning(f"Failed to fetch A2A agent card for {agent_name}: {e}") + description = agent_or_config.get( + "description", f"External agent: {agent_name}" + ) + else: + description = agent_or_config.get("description", f"External agent: {agent_name}") + else: + description = agent_or_config.get("description", f"{agent_type} agent: {agent_name}") + else: + agent_type = "unknown" + description = f"Agent: {agent_name}" + + agent_entry = {"name": agent_name, "type": agent_type, "description": description} + if agent_card is not None: + agent_entry["agent_card"] = format_agent_card_for_prompt(agent_card) + agent_list.append(agent_entry) + + tool_name = f"delegate_to_{agent_name}" + tool_func = _create_agent_delegation_func(agent_name, agent_or_config, agent_card=agent_card) + adapter._agent_tools_context[tool_name] = tool_func + + is_a2a_agent = agent_card is not None + if is_a2a_agent and pass_variables_a2a: + tool_info = { + "name": tool_name, + "description": ( + f"Delegate a task to the {agent_name} agent. {description} " + "Variables are passed in request metadata." + ), + "params_str": "task: str, variables: Optional[List[str]] = None", + "params_doc": ( + f"- task (str): The task description to send to {agent_name}\n" + f"- variables (Optional[List[str]]): Variable names to pass in A2A metadata" + ), + "response_doc": f"Returns the result from {agent_name}.", + } + elif is_a2a_agent: + tool_info = { + "name": tool_name, + "description": f"Delegate a task to {agent_name}. {description}", + "params_str": "task: str", + "params_doc": f"- task (str): The task description to send to {agent_name}.", + "response_doc": f"Returns the result from {agent_name}.", + } + else: + tool_info = { + "name": tool_name, + "description": ( + f"Delegate a task to the {agent_name} agent. " + f"This agent specializes in: {description}" + ), + "params_str": "task: str, variables: Optional[List[str]] = None", + "params_doc": ( + f"- task (str): The task description to delegate to {agent_name}\n" + f"- variables (Optional[List[str]]): List of variable names to pass" + ), + "response_doc": f"Returns the result from {agent_name} agent execution.", + } + agent_tools_for_prompt.append(tool_info) + + from cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_graph import create_update_todos_tool + from cuga.backend.cuga_graph.nodes.cuga_agent_core.code_extraction import make_tool_awaitable + + todos_tool = await create_update_todos_tool() + adapter._agent_tools_context["create_update_todos"] = make_tool_awaitable(todos_tool.func) + agent_tools_for_prompt.append( + { + "name": "create_update_todos", + "description": todos_tool.description, + "params_str": "todos: List[Dict[str, str]]", + "params_doc": ( + "todos: List of todo items, each with 'text' and 'status' ('pending' or 'completed')" + ), + "response_doc": "Returns the current list of todos with their status.", + } + ) + + _cfg = config.get("configurable", {}) if config else {} + _runtime_thread_id = _cfg.get("thread_id") or state.thread_id + _runtime_backends = resolve_runtime_backends(settings, _cfg) + _runtime_bundle = build_runtime_tools(thread_id=_runtime_thread_id, backends=_runtime_backends) + adapter._agent_tools_context.update(_runtime_bundle.execution_callables) + agent_tools_for_prompt.extend(prompt_tool_dicts(_runtime_bundle.prompt_tools)) + + _skills_section = "" + if getattr(settings.skills, "enabled", False): + from cuga.backend.skills import ( + SkillRegistry, + create_skill_tools, + discover_skills, + format_available_skills_block, + ) + + _cuga_folder = os.getenv("CUGA_FOLDER", settings.policy.cuga_folder) + _skill_entries = discover_skills(_cuga_folder) + if _skill_entries: + _skill_registry = SkillRegistry(_skill_entries) + _skill_tools = create_skill_tools(_skill_registry) + for _st in _skill_tools: + _tool_func = ( + _st.coroutine if getattr(_st, "coroutine", None) else getattr(_st, "func", None) + ) + if _tool_func: + adapter._agent_tools_context[_st.name] = make_tool_awaitable(_tool_func) + agent_tools_for_prompt.extend(prompt_tool_dicts(_skill_tools)) + _skills_section = format_available_skills_block(_skill_registry) + logger.info(f"Supervisor: loaded {len(_skill_entries)} skill(s)") + + if adapter._tool_provider is not None: + try: + _provider_tools = await adapter._tool_provider.get_all_tools() + for _pt in _provider_tools: + _pt_func = ( + _pt.coroutine if getattr(_pt, "coroutine", None) else getattr(_pt, "func", None) + ) + if _pt_func: + adapter._agent_tools_context[_pt.name] = make_tool_awaitable(_pt_func) + agent_tools_for_prompt.extend(prompt_tool_dicts(_provider_tools)) + logger.info(f"Supervisor: loaded {len(_provider_tools)} tool(s) from tool_provider") + except Exception as _e: + logger.warning(f"Supervisor: failed to load tools from tool_provider: {_e}") + + _split_note = split_execution_note(ExecutionRouter.resolve(settings)) + _effective_special_instructions = ( + "\n\n".join(filter(None, [adapter._special_instructions, _skills_section, _split_note])) + or None + ) + + is_autonomous_subtask = state.sub_task is not None and state.sub_task.strip() != "" + + from jinja2 import Template + + template = Template(_prompt_template_str) + dynamic_prompt = template.render( + base_prompt=None, + agents=agent_list, + tools=agent_tools_for_prompt, + is_autonomous_subtask=is_autonomous_subtask, + instructions=_instructions, + enable_todos=True, + special_instructions=_effective_special_instructions, + ) + + return Command( + goto="call_model", + update={ + "tools_prepared": True, + "prepared_prompt": dynamic_prompt, + "step_count": 0, + "available_agents": { + name: AgentInfo( + name=name, type=info["type"], description=info["description"] + ).model_dump() + for name, info in zip([a["name"] for a in agent_list], agent_list) + }, + }, + ) + + return prepare_agents_and_prompt + + def build_execute_node(self) -> Callable: + """Return the ``execute_agent_tool`` async node function.""" + from cuga.backend.cuga_graph.nodes.cuga_supervisor.cuga_supervisor_state import ( + CugaSupervisorState, + ) + from cuga.backend.cuga_graph.nodes.cuga_lite.tool_approval_handler import ToolApprovalHandler + from cuga.backend.cuga_graph.nodes.cuga_lite.executors import CodeExecutor + from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import ( + append_chat_messages_with_step_limit as _core_append, + create_error_command as _core_create_error, + execution_output_text, + ) + from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution_policy import ExecutionRouter + from langchain_core.runnables import RunnableConfig + + adapter = self + + def _append(state, new_msgs): + return _core_append(adapter, state, new_msgs) + + def _create_error(updated_messages, error_message, step_count, additional_updates=None): + return _core_create_error( + adapter, updated_messages, error_message, step_count, additional_updates + ) + + async def execute_agent_tool(state: CugaSupervisorState, config: Optional[RunnableConfig] = None): + logger.info("Supervisor conversational: executing agent delegation code") + + if settings.policy.enabled: + denial_command = ToolApprovalHandler.handle_denial(adapter, state) + if denial_command: + return denial_command + + existing_vars = {} + var_manager = adapter.get_variable_manager(state) + for var_name in var_manager.get_variable_names(): + existing_vars[var_name] = var_manager.get_variable(var_name) + + adapter._shared_vm_ref[0] = var_manager + + context = {**existing_vars, **adapter._agent_tools_context} + + try: + _exec_plan = ExecutionRouter.resolve(settings) + if _exec_plan.split_execution_active: + logger.info( + "Supervisor split execution: python=%s shell=%s fs=%s", + _exec_plan.python_backend, + _exec_plan.shell_backend, + _exec_plan.filesystem_backend, + ) + output, new_vars = await CodeExecutor.eval_with_tools_async( + code=state.script, + _locals=context, + state=state, + thread_id=state.thread_id, + apps_list=None, + variable_manager=adapter.get_variable_manager(state), + plan=_exec_plan, + ) + + logger.debug(f"Execution output: {output.strip()[:500]}...") + + for name, value in new_vars.items(): + var_manager.add_variable( + value, name=name, description="Created during agent delegation execution" + ) + + execution_message_content = execution_output_text(output) + new_message = HumanMessage(content=execution_message_content) + updated_messages, error_message = _append(state, [new_message]) + + if error_message: + return _create_error( + updated_messages, + error_message, + state.step_count, + additional_updates={"supervisor_variables": state.supervisor_variables}, + ) + + return { + "supervisor_chat_messages": updated_messages, + "supervisor_variables": state.supervisor_variables, + "step_count": state.step_count + 1, + } + except Exception as e: + error_msg = f"Error during execution: {str(e)}" + logger.error(error_msg, exc_info=True) + new_message = HumanMessage(content=error_msg) + updated_messages, limit_error_message = _append(state, [new_message]) + + if limit_error_message: + return _create_error(updated_messages, limit_error_message, state.step_count) + + return { + "supervisor_chat_messages": updated_messages, + "error": error_msg, + "execution_complete": True, + "step_count": state.step_count + 1, + } + + return execute_agent_tool diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/tests/__init__.py b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/tests/test_supervisor_graph_adapter.py b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/tests/test_supervisor_graph_adapter.py new file mode 100644 index 00000000..50cfc71c --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/tests/test_supervisor_graph_adapter.py @@ -0,0 +1,159 @@ +"""Phase 5 — SupervisorGraphAdapter hook correctness tests. + +Pins the hook overrides that SupervisorGraphAdapter contributes to the shared +call_model node and the node-factory contract: + +1. Class-level attributes (messages_key, execute_node_name, etc.) +2. State-reading hooks: get_messages, get_variable_manager, get_variables_storage +3. Step-limit hook: resolve_max_steps with override / state / default +4. Factory methods: build_prepare_node and build_execute_node return async callables +""" + +from __future__ import annotations + +import asyncio +from types import SimpleNamespace +from unittest.mock import MagicMock + + +def _get_adapter_class(): + from cuga.backend.cuga_graph.nodes.cuga_supervisor.supervisor_graph_adapter import ( + SupervisorGraphAdapter, + ) + + return SupervisorGraphAdapter + + +def _make_adapter(**kwargs): + cls = _get_adapter_class() + return cls( + agents=kwargs.get("agents", {}), + special_instructions=kwargs.get("special_instructions", None), + tool_provider=kwargs.get("tool_provider", None), + ) + + +# ── 1. Class-level attributes ────────────────────────────────────────────── + + +def test_messages_key_is_supervisor_chat_messages(): + cls = _get_adapter_class() + assert cls.messages_key == "supervisor_chat_messages" + + +def test_execute_node_name_is_execute_agent_tool(): + cls = _get_adapter_class() + assert cls.execute_node_name == "execute_agent_tool" + + +def test_metadata_key_is_supervisor_metadata(): + cls = _get_adapter_class() + assert cls.metadata_key == "supervisor_metadata" + + +def test_sender_name_is_cuga_supervisor(): + cls = _get_adapter_class() + assert cls.sender_name == "CugaSupervisor" + + +# ── 2. get_messages hook ─────────────────────────────────────────────────── + + +def test_get_messages_returns_supervisor_chat_messages(): + adapter = _make_adapter() + msgs = [MagicMock()] + state = SimpleNamespace(supervisor_chat_messages=msgs) + assert adapter.get_messages(state) is msgs + + +def test_get_messages_returns_empty_list_when_none(): + adapter = _make_adapter() + state = SimpleNamespace(supervisor_chat_messages=None) + assert adapter.get_messages(state) == [] + + +# ── 3. resolve_max_steps hook ────────────────────────────────────────────── + + +def test_resolve_max_steps_uses_override_when_given(): + adapter = _make_adapter() + state = SimpleNamespace(cuga_lite_max_steps=None) + assert adapter.resolve_max_steps(state, 20) == 20 + + +def test_resolve_max_steps_uses_state_when_set(): + adapter = _make_adapter() + state = SimpleNamespace(cuga_lite_max_steps=30) + assert adapter.resolve_max_steps(state, None) == 30 + + +def test_resolve_max_steps_falls_back_to_settings_default(): + adapter = _make_adapter() + state = SimpleNamespace(cuga_lite_max_steps=None) + result = adapter.resolve_max_steps(state, None) + assert isinstance(result, int) + assert result > 0 + + +# ── 4. get_variable_manager hook ────────────────────────────────────────── + + +def test_get_variable_manager_returns_supervisor_variables_manager(): + adapter = _make_adapter() + sentinel = object() + state = SimpleNamespace(supervisor_variables_manager=sentinel) + assert adapter.get_variable_manager(state) is sentinel + + +# ── 5. get_variables_storage hook ───────────────────────────────────────── + + +def test_get_variables_storage_returns_supervisor_variables(): + adapter = _make_adapter() + storage = {"x": {"value": 1}} + state = SimpleNamespace(supervisor_variables=storage) + assert adapter.get_variables_storage(state) is storage + + +def test_get_variables_storage_returns_none_when_missing(): + adapter = _make_adapter() + state = SimpleNamespace() + assert adapter.get_variables_storage(state) is None + + +# ── 6. Factory methods return async callables ────────────────────────────── + + +def test_build_prepare_node_returns_async_callable(): + adapter = _make_adapter() + node = adapter.build_prepare_node() + assert callable(node) + assert asyncio.iscoroutinefunction(node) + + +def test_build_execute_node_returns_async_callable(): + adapter = _make_adapter() + node = adapter.build_execute_node() + assert callable(node) + assert asyncio.iscoroutinefunction(node) + + +# ── 7. resolve_names_from_caller_frame helper ────────────────────────────── + + +def test_resolve_names_from_caller_frame_is_accessible(): + from cuga.backend.cuga_graph.nodes.cuga_supervisor.supervisor_graph_adapter import ( + _resolve_names_from_caller_frame, + ) + + assert callable(_resolve_names_from_caller_frame) + + +def test_resolve_names_from_caller_frame_resolves_locals(): + from cuga.backend.cuga_graph.nodes.cuga_supervisor.supervisor_graph_adapter import ( + _resolve_names_from_caller_frame, + ) + + my_local_var = 42 # noqa: F841 — intentionally referenced by name below + resolved = _resolve_names_from_caller_frame(["my_local_var"]) + assert resolved.get("my_local_var") == 42 diff --git a/src/cuga/backend/cuga_graph/policy/enactment.py b/src/cuga/backend/cuga_graph/policy/enactment.py index 6c8f77c5..5aa1d913 100644 --- a/src/cuga/backend/cuga_graph/policy/enactment.py +++ b/src/cuga/backend/cuga_graph/policy/enactment.py @@ -29,6 +29,7 @@ async def check_and_enact( state: AgentState, config: Optional[RunnableConfig] = None, policy_types: Optional[List[PolicyType]] = None, + adapter: Any = None, ) -> tuple[Optional[Command], Optional[Dict[str, Any]]]: """ Check for applicable policies and return enactment command or metadata. @@ -103,7 +104,7 @@ async def check_and_enact( f"Policy matched: {policy_match.policy.name} (action: {policy_match.action.action_type})" ) command, metadata = await PolicyEnactment._enact_policy_action( - state, policy_match, policy_system, context + state, policy_match, policy_system, context, adapter ) # ALWAYS apply Tool Guide policies (merge metadata from all matches) @@ -183,7 +184,11 @@ def _merge_guide_metadata(guide_matches: List[PolicyMatch]) -> Dict[str, Any]: @staticmethod async def _enact_policy_action( - state: Any, policy_match: PolicyMatch, policy_system: PolicyConfigurable, context: Any + state: Any, + policy_match: PolicyMatch, + policy_system: PolicyConfigurable, + context: Any, + adapter: Any = None, ) -> tuple[Optional[Command], Optional[Dict[str, Any]]]: """ Enact a specific policy action. @@ -200,7 +205,7 @@ async def _enact_policy_action( action_type = policy_match.action.action_type if action_type == PolicyActionType.BLOCK_INTENT: - return PolicyEnactment._enact_block_intent(state, policy_match) + return PolicyEnactment._enact_block_intent(state, policy_match, adapter) elif action_type == PolicyActionType.GUIDE_PROMPT: return await PolicyEnactment._enact_guide_prompt(state, policy_match, policy_system, context) @@ -228,7 +233,9 @@ async def _enact_policy_action( return None, None @staticmethod - def _enact_block_intent(state: Any, policy_match: PolicyMatch) -> tuple[Command, None]: + def _enact_block_intent( + state: Any, policy_match: PolicyMatch, adapter: Any = None + ) -> tuple[Command, None]: """ Block the intent and return immediately with guard response. @@ -243,14 +250,25 @@ def _enact_block_intent(state: Any, policy_match: PolicyMatch) -> tuple[Command, blocked_message = AIMessage(content=policy_match.action.content) + # adapter=None preserves the exact legacy Lite literals (back-compat + # for the output-formatter caller and any non-adapter path). + if adapter is not None: + base_messages = adapter.get_messages(state) + messages_key = adapter.messages_key + metadata_key = adapter.metadata_key + else: + base_messages = state.chat_messages + messages_key = "chat_messages" + metadata_key = "cuga_lite_metadata" + return ( Command( goto=END, update={ - "chat_messages": state.chat_messages + [blocked_message], + messages_key: base_messages + [blocked_message], "final_answer": policy_match.action.content, "execution_complete": True, - "cuga_lite_metadata": { + metadata_key: { "policy_blocked": True, "policy_id": policy_match.policy.id, "policy_name": policy_match.policy.name, diff --git a/src/cuga/config.py b/src/cuga/config.py index 357d0a7d..2018bb37 100644 --- a/src/cuga/config.py +++ b/src/cuga/config.py @@ -180,6 +180,12 @@ def get_all_paths(config, parent_key=""): Validator("auth.iam_proxy_ca_bundle", default=""), Validator("auth.role_token_source", default="auto"), Validator("skills.enabled", default=False), + # Phase 6: explicit execution axes — override advanced_features when set. + # None (default) means "read from advanced_features" (full backward-compat). + Validator("execution.python_backend", default=None), + Validator("execution.shell_backend", default=None), + Validator("execution.filesystem_backend", default=None), + Validator("execution.workspace_root", default=None), Validator("advanced_features.builtin_tools", default=["knowledge"]), Validator("advanced_features.cuga_lite_bind_tools_tool_names", default=[]), # Evolve integration diff --git a/src/cuga/sdk.py b/src/cuga/sdk.py index 94df8879..399137e2 100644 --- a/src/cuga/sdk.py +++ b/src/cuga/sdk.py @@ -130,6 +130,10 @@ class InvokeResult(BaseModel): ) thread_id: str = Field(default="", description="Thread ID used for this invocation") error: Optional[str] = Field(default=None, description="Error message if execution failed") + variables: Dict[str, Any] = Field( + default_factory=dict, + description="Variables computed by the sub-agent, bridged to the Supervisor's namespace", + ) def __str__(self) -> str: """Return the answer when converting to string for backward compatibility.""" @@ -1803,11 +1807,16 @@ async def invoke( # Get tool calls from result (only if tracking was enabled) tool_calls = result.get("tool_calls", []) if track_tool_calls else [] + from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + + _hitl_variables = VariableBridge.extract_values(result.get("variables_storage", {}) or {}) + return InvokeResult( answer=final_answer, tool_calls=tool_calls, thread_id=thread_id, error=error_msg, + variables=_hitl_variables, ) # Normal invocation case @@ -1937,6 +1946,12 @@ async def invoke( # Get tool calls from result (only if tracking was enabled) tool_calls = result.get("tool_calls", []) if track_tool_calls else [] + + # Extract sub-agent variables for VariableBridge (Phase 8). + from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + + _result_variables = VariableBridge.extract_values(result.get("variables_storage", {}) or {}) + if settings.advanced_features.benchmark == "appworld": llm_model = llm_manager.get_model(settings.agent.final_answer.model) appworld_plain = getattr(settings.advanced_features, "appworld_final_answer_plain", False) @@ -1974,6 +1989,7 @@ async def invoke( tool_calls=tool_calls, thread_id=thread_id, error=error_msg, + variables=_result_variables, ) async def stream( From d6b188042517b21b10b3c3d49fa77d169b18f711 Mon Sep 17 00:00:00 2001 From: Sami Marreed Date: Wed, 20 May 2026 19:57:53 +0300 Subject: [PATCH 2/7] fix: add missing tests --- .../nodes/cuga_agent_core/shared_graph.py | 6 ++-- .../tests/test_shared_graph_builder.py | 33 +++++++++++++++++++ src/scripts/run_tests.sh | 10 ++++-- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/shared_graph.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/shared_graph.py index 20b795cd..9d1110fe 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/shared_graph.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/shared_graph.py @@ -2,7 +2,7 @@ ``build_agent_graph`` wires the canonical 3-node agent graph structure: - START → prepare → call_model ↔ execute (loop) → END + START → prepare --Command--> call_model ↔ execute (loop) → END Both CugaLite and CugaSupervisor share this structure. The nodes themselves are provided by the caller (produced by adapter factories), so the graph @@ -54,6 +54,8 @@ def build_agent_graph( graph.add_node(adapter.execute_node_name, execute_node) graph.add_edge(START, "prepare") - graph.add_edge("prepare", "call_model") + # prepare returns Command(goto=...) — no static edge (avoids call_model after BLOCK_INTENT). + # Execute node returns a state update (not Command); loop back for the NL answer. + graph.add_edge(adapter.execute_node_name, "call_model") return graph diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_shared_graph_builder.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_shared_graph_builder.py index 4234eda5..8a2dbb52 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_shared_graph_builder.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_shared_graph_builder.py @@ -100,6 +100,39 @@ def test_built_graph_has_prepare_and_call_model_nodes(): assert "sandbox" in node_names # adapter.execute_node_name +def test_built_graph_has_execute_to_call_model_edge(): + """Sandbox must loop back to call_model after code execution (regression guard).""" + build = _get_builder() + adapter = _MinimalAdapter() + + graph = build( + adapter=adapter, + state_class=_MockState, + prepare_node=_noop_node(), + call_model_node=_noop_node(), + execute_node=_noop_node(), + ) + + edges = graph.edges + assert ("sandbox", "call_model") in edges + + +def test_built_graph_has_no_prepare_to_call_model_edge(): + """prepare routes via Command only — static edge would run call_model after BLOCK_INTENT.""" + build = _get_builder() + adapter = _MinimalAdapter() + + graph = build( + adapter=adapter, + state_class=_MockState, + prepare_node=_noop_node(), + call_model_node=_noop_node(), + execute_node=_noop_node(), + ) + + assert ("prepare", "call_model") not in graph.edges + + # ── 3. Graph has expected node names (Supervisor-like) ──────────────────── diff --git a/src/scripts/run_tests.sh b/src/scripts/run_tests.sh index 1ced2c81..b7724c51 100755 --- a/src/scripts/run_tests.sh +++ b/src/scripts/run_tests.sh @@ -6,7 +6,7 @@ if [ "$1" = "--help" ] || [ "$1" = "-h" ]; then echo "" echo "Test runner script:" echo " (no args) Run default tests (registry + e2e + stability tests)" - echo " unit_tests Run unit tests only (registry + variables manager + sandbox + E2B lite + knowledge)" + echo " unit_tests Run unit tests only (registry + agent-core + cuga-lite + supervisor + knowledge)" echo " --skip-stability Run all tests except stability tests" echo " --help, -h Show this help message" echo "" @@ -73,8 +73,12 @@ run_pytest ./src/cuga/backend/tools_env/registry/mcp_manager/tests/ run_pytest ./src/cuga/backend/cuga_graph/nodes/api/variables_manager/tests/ run_pytest_with_e2b ./src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/ run_pytest ./src/cuga/backend/cuga_graph/nodes/api/code_agent/tests/ -echo "Running memory tests..." -run_pytest ./src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/memory/ +echo "Running agent-core tests..." +run_pytest ./src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/ +echo "Running cuga-lite tests..." +run_pytest ./src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/ +echo "Running supervisor tests..." +run_pytest ./src/cuga/backend/cuga_graph/nodes/cuga_supervisor/tests/ echo "Running knowledge tests..." run_pytest \ tests/unit/test_knowledge_engine.py \ From 030acc849b49dbf06fae3492fcb6c13ee1edc5d6 Mon Sep 17 00:00:00 2001 From: Sami Marreed Date: Wed, 20 May 2026 20:21:39 +0300 Subject: [PATCH 3/7] fix: address missing test --- src/scripts/run_tests.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/src/scripts/run_tests.sh b/src/scripts/run_tests.sh index b7724c51..6c572bed 100755 --- a/src/scripts/run_tests.sh +++ b/src/scripts/run_tests.sh @@ -72,7 +72,6 @@ run_pytest ./src/cuga/backend/tools_env/registry/tests/ run_pytest ./src/cuga/backend/tools_env/registry/mcp_manager/tests/ run_pytest ./src/cuga/backend/cuga_graph/nodes/api/variables_manager/tests/ run_pytest_with_e2b ./src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/ -run_pytest ./src/cuga/backend/cuga_graph/nodes/api/code_agent/tests/ echo "Running agent-core tests..." run_pytest ./src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/ echo "Running cuga-lite tests..." From 65219e8ceb2639586c0e1a17679482fd81e5e979 Mon Sep 17 00:00:00 2001 From: Sami Marreed Date: Wed, 20 May 2026 22:06:31 +0300 Subject: [PATCH 4/7] chore: untrack .claude/settings.json and add to .gitignore [skip ci] - Remove settings.json from git tracking (git rm --cached) - Add .claude/settings.json to .gitignore alongside existing settings.local.json entry --- .claude/settings.json | 9 --------- .gitignore | 1 + 2 files changed, 1 insertion(+), 9 deletions(-) delete mode 100644 .claude/settings.json diff --git a/.claude/settings.json b/.claude/settings.json deleted file mode 100644 index 16f8e2f6..00000000 --- a/.claude/settings.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "permissions": { - "allow": [ - "Bash(python -m pytest src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_runtime_tools.py::test_skill_tool_is_prompt_dict_compatible src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_runtime_tools.py::test_skill_tool_func_is_awaitable_via_make_tool_awaitable -v)", - "Bash(python -m pytest src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/ src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/ -q)", - "Bash(python -m *)" - ] - } -} diff --git a/.gitignore b/.gitignore index 7601915b..6670e120 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ node_modules .pnpm-store/ .agents *.log +.claude/settings.json .claude/settings.local.json /frontend/dist/ src/frontend_workspaces/frontend/dist/ From fa90144422bcc0b98448a2add6777026bd3106f5 Mon Sep 17 00:00:00 2001 From: Sami Marreed Date: Thu, 21 May 2026 15:53:25 +0300 Subject: [PATCH 5/7] =?UTF-8?q?refactor(cuga-lite):=20thin=20cuga=5Flite?= =?UTF-8?q?=5Fgraph.py=20=E2=80=94=20move=20all=20logic=20into=20AgentGrap?= =?UTF-8?q?hAdapter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cuga_lite_graph.py goes from ~1800 lines to 214: state class, loop adapter, two thin wrappers, and the wiring factory. All node logic (prepare, sandbox, bind-tools helpers, find-tools helpers, todos, knowledge, reflection) now lives in AgentGraphAdapter, matching the pattern already used by cuga_supervisor_graph.py / SupervisorGraphAdapter. Also fixes 7 CodeRabbit review items from PR #250: - graph_nodes.py: use model_copy(update=...) on HumanMessage to preserve id/additional_kwargs - shared_nodes.py: same model_copy fix; Loguru %d/%s placeholders -> {} - tool_approval_handler.py: handle_denial clears user_approved from metadata in state update - cuga_supervisor_graph.py: remove dead _resolve_names_from_caller_frame + inspect import - supervisor_graph_adapter.py: guard var_manager None; fix create_update_todos_tool import source --- .../nodes/cuga_agent_core/graph_nodes.py | 4 +- .../nodes/cuga_agent_core/shared_nodes.py | 6 +- .../tests/test_tool_approval_adapter.py | 1 + .../nodes/cuga_lite/agent_graph_adapter.py | 1559 ++++++++++++++- .../nodes/cuga_lite/cuga_lite_graph.py | 1709 +---------------- .../nodes/cuga_lite/tool_approval_handler.py | 3 + .../cuga_supervisor/cuga_supervisor_graph.py | 24 - .../supervisor_graph_adapter.py | 19 +- 8 files changed, 1593 insertions(+), 1732 deletions(-) diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph_nodes.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph_nodes.py index 58489b5a..dbe0cf0a 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph_nodes.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph_nodes.py @@ -254,7 +254,9 @@ def inject_playbook_guidance( result = list(messages) original = result[last_human_idx] - result[last_human_idx] = HumanMessage(content=f"{original.content}\n\n## Task Guidance\n{guidance}") + result[last_human_idx] = original.model_copy( + update={"content": f"{original.content}\n\n## Task Guidance\n{guidance}"} + ) return result diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/shared_nodes.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/shared_nodes.py index a79b10e1..d96b27b0 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/shared_nodes.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/shared_nodes.py @@ -146,7 +146,7 @@ async def call_model(state: Any, config: Any = None) -> Command: content = content + variables_addendum modified = True - modified_messages.append(HumanMessage(content=content) if modified else msg) + modified_messages.append(msg.model_copy(update={"content": content}) if modified else msg) messages_for_model.append({"role": "user", "content": content}) elif is_ai: @@ -156,10 +156,10 @@ async def call_model(state: Any, config: Any = None) -> Command: else: modified_messages.append(msg) - logger.warning("call_model: skipping message %d with unknown role: %s", i, msg_role) + logger.warning("call_model: skipping message {} with unknown role: {}", i, msg_role) logger.info( - "call_model: %d messages → model (%s)", + "call_model: {} messages → model ({})", len(messages_for_model), adapter.sender_name, ) diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_tool_approval_adapter.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_tool_approval_adapter.py index 65ead1b1..fa7b657a 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_tool_approval_adapter.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_tool_approval_adapter.py @@ -120,6 +120,7 @@ def test_handle_denial_lite_shape(): "execution_complete": True, "final_answer": "Execution cancelled by user.", "step_count": 5, + "cuga_lite_metadata": {}, } assert ToolApprovalHandler.handle_denial(a, SimpleNamespace(cuga_lite_metadata={})) is None diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/agent_graph_adapter.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/agent_graph_adapter.py index 70b89d84..92cca7bc 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/agent_graph_adapter.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/agent_graph_adapter.py @@ -17,22 +17,752 @@ from __future__ import annotations import json -from typing import Any, Dict, List, Optional, Tuple +import os +from pathlib import Path +from typing import Any, Dict, List, Optional, Set, Tuple -from langchain_core.messages import BaseMessage +from langchain_core.exceptions import OutputParserException +from langchain_core.language_models import BaseChatModel +from langchain_core.messages import AIMessage, BaseMessage, HumanMessage +from langchain_core.runnables import RunnableConfig +from langchain_core.tools import StructuredTool +from langgraph.types import Command from loguru import logger +from pydantic import BaseModel, Field -from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import CoreGraphAdapter +from cuga.backend.activity_tracker.tracker import Step +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import ( + CoreGraphAdapter, + append_chat_messages_with_step_limit as _core_append_with_step_limit, + create_error_command as _core_create_error_command, + execution_output_text, +) +from cuga.backend.cuga_graph.nodes.cuga_agent_core.code_extraction import make_tool_awaitable +from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution_policy import ( + ExecutionRouter, + split_execution_note, +) +from cuga.backend.cuga_graph.nodes.cuga_agent_core.runtime_tools import ( + build_runtime_tools, + resolve_runtime_backends, +) +from cuga.backend.cuga_graph.nodes.cuga_lite.executors.code_executor import ( + CodeExecutor, + is_find_tools_listing_markdown, +) +from cuga.backend.cuga_graph.nodes.cuga_lite.model_runtime_profile import ( + resolve_bind_tools_fields, + resolved_runtime_model_name, +) from cuga.backend.cuga_graph.nodes.cuga_lite.nl_auto_continue_classifier import ( classify_nl_auto_continue, normalize_assistant_text, ) +from cuga.backend.cuga_graph.nodes.cuga_lite.prompt_utils import ( + PromptUtils, + create_mcp_prompt, + format_apps_for_prompt, + normalize_mcp_few_shot_examples, + resolve_cuga_lite_few_shots_enabled, +) +from cuga.backend.cuga_graph.nodes.cuga_lite.reflection.reflection import reflection_task +from cuga.backend.cuga_graph.nodes.cuga_lite.tool_approval_handler import ToolApprovalHandler +from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ToolProviderInterface +from cuga.backend.cuga_graph.nodes.task_decomposition_planning.analyze_task import TaskAnalyzer +from cuga.backend.cuga_graph.policy.enactment import PolicyEnactment from cuga.backend.llm.errors import extract_code_from_tool_use_failed +from cuga.backend.llm.models import LLMManager +from cuga.backend.skills import ( + SkillRegistry, + create_skill_tools, + discover_skills, + format_available_skills_block, +) from cuga.config import settings +_llm_manager = LLMManager() + # ── Helpers (moved from cuga_lite_graph.py) ──────────────────────────────── +# ── Bind-tools helpers (Task 1) ──────────────────────────────────────────── + + +def _bind_tools_mode_from_settings() -> str: + try: + m = getattr(settings.advanced_features, "cuga_lite_bind_tools_mode", None) + if m is not None and str(m).strip(): + return str(m).strip().lower() + except Exception: + pass + return "none" + + +def _bind_tools_apps_from_settings(): + try: + raw = getattr(settings.advanced_features, "cuga_lite_bind_tools_apps", None) + if raw is None: + return [] + if isinstance(raw, str): + return [raw.strip()] if raw.strip() else [] + if isinstance(raw, (list, tuple)): + return [str(x).strip() for x in raw if str(x).strip()] + except Exception: + pass + return [] + + +def _bind_tools_tool_names_from_settings(): + try: + raw = getattr(settings.advanced_features, "cuga_lite_bind_tools_tool_names", None) + if raw is None: + return [] + if isinstance(raw, str): + return [raw.strip()] if raw.strip() else [] + if isinstance(raw, (list, tuple)): + return [str(x).strip() for x in raw if str(x).strip()] + except Exception: + pass + return [] + + +def _bind_include_find_tools_from_config(cfg: Dict[str, Any]) -> bool: + v = cfg.get("cuga_lite_bind_tools_include_find_tools") + if v is None: + try: + v = getattr(settings.advanced_features, "cuga_lite_bind_tools_include_find_tools", False) + except Exception: + v = False + if isinstance(v, bool): + return v + if isinstance(v, str): + return v.strip().lower() in ("true", "1", "yes", "on") + return bool(v) + + +def _merge_find_tools_into_bound( + bound: List[StructuredTool], + seen: Set[str], + *, + include_find_tools: bool, + tools_context_ref: Optional[Dict[str, Any]], +) -> None: + if not include_find_tools: + return + ft = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") + if not ft: + return + name = getattr(ft, "name", None) or "" + if name and name not in seen: + seen.add(name) + bound.append(ft) + + +async def _indexed_provider_tools_first_wins( + tool_provider: ToolProviderInterface, +) -> Dict[str, StructuredTool]: + """Map tool name → StructuredTool using provider.get_all_tools (first occurrence wins).""" + try: + all_tools = await tool_provider.get_all_tools() + except Exception as e: + logger.warning("bind_tools: get_all_tools failed: %s", e) + return {} + by_name: Dict[str, StructuredTool] = {} + duplicates: Set[str] = set() + for t in all_tools or []: + n = getattr(t, "name", None) or "" + if not n: + continue + if n in by_name: + duplicates.add(n) + continue + by_name[n] = t + if duplicates: + logger.debug( + "bind_tools: duplicate tool names from provider (using first): %s", + sorted(duplicates), + ) + return by_name + + +async def _indexed_tools_for_native_bind( + tool_provider: ToolProviderInterface, + tools_context_ref: Optional[Dict[str, Any]], +) -> Dict[str, StructuredTool]: + """Registry MCP tools plus in-graph overlays (skills, OpenSandbox shell, todos, find_tools). + + ``run_command`` / ``write_file`` / etc. are not registered with ToolRegistryProvider; prepare + copies them onto ``tools_for_prompt`` only. Overlay must merge so ``cuga_lite_bind_tools_tool_names`` + can bind them by name. + """ + by_name = await _indexed_provider_tools_first_wins(tool_provider) + overlay = (tools_context_ref or {}).get("_lc_bind_tools_overlay_structured_tools") or [] + if not overlay: + return by_name + for t in overlay: + n = getattr(t, "name", None) or "" + if not n: + continue + by_name[n] = t + return by_name + + +async def resolve_model_with_bind_tools( + active_model: BaseChatModel, + *, + configurable: Optional[Dict[str, Any]], + tools_context_ref: Optional[Dict[str, Any]], + tool_provider: Optional[ToolProviderInterface], + model_name: Optional[str] = None, +) -> BaseChatModel: + """Optionally wrap ``active_model`` with ``bind_tools`` for native tool-calling tests. + + LangGraph ``config['configurable']`` overrides per-model runtime profile overrides TOML: + + - ``cuga_lite_bind_tools_mode``: ``none`` | ``find_tools`` | ``all`` | ``apps`` | ``tools`` | ``apps_and_tools`` + - ``cuga_lite_bind_tools_apps``: list of app names (``mode=apps`` or ``apps_and_tools``) + - ``cuga_lite_bind_tools_tool_names``: StructuredTool ``name`` values (``mode=tools`` or ``apps_and_tools``) + - ``cuga_lite_bind_tools_include_find_tools``: merge ``find_tools`` into ``all`` / ``apps`` / ``tools`` / ``apps_and_tools`` + + Profile ``gpt-oss-20b``: see ``model_runtime_profile.GPT_OSS_20B_RUNTIME_DEFAULTS``. + """ + cfg = configurable or {} + mn = (model_name or "").strip() + if not mn: + mn = resolved_runtime_model_name( + configurable_llm=cfg.get("llm"), + graph_default_model=active_model, + ) + mode, app_names, tool_names, include_find_tools = resolve_bind_tools_fields( + configurable, + mn, + settings_mode_fn=_bind_tools_mode_from_settings, + settings_apps_fn=_bind_tools_apps_from_settings, + settings_tool_names_fn=_bind_tools_tool_names_from_settings, + settings_include_fn=lambda: _bind_include_find_tools_from_config({}), + ) + + if mode in ("", "none", "false", "0", "off"): + if include_find_tools: + ft_only = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") + if ft_only: + return active_model.bind_tools([ft_only]) + return active_model + + try: + if mode == "find_tools": + ft = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") + if ft: + return active_model.bind_tools([ft]) + logger.debug( + "cuga_lite_bind_tools_mode=find_tools but find_tools StructuredTool is missing " + "(shortlisting may be off)" + ) + return active_model + + if mode == "all": + if not tool_provider: + logger.warning("cuga_lite_bind_tools_mode=all but tool_provider is missing") + return active_model + by_name = await _indexed_tools_for_native_bind(tool_provider, tools_context_ref) + bound = list(by_name.values()) + seen: Set[str] = {n for n in by_name} + _merge_find_tools_into_bound( + bound, seen, include_find_tools=include_find_tools, tools_context_ref=tools_context_ref + ) + if not bound: + return active_model + return active_model.bind_tools(bound) + + if mode == "apps_and_tools": + if not tool_provider: + logger.warning("cuga_lite_bind_tools_mode=apps_and_tools but tool_provider is missing") + return active_model + if not app_names and not tool_names: + if include_find_tools: + ft = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") + if ft: + return active_model.bind_tools([ft]) + logger.warning( + "cuga_lite_bind_tools_mode=apps_and_tools but cuga_lite_bind_tools_apps and " + "cuga_lite_bind_tools_tool_names are both empty " + "(set include_find_tools to bind find_tools only)" + ) + return active_model + + bound: List[StructuredTool] = [] + seen_names: Set[str] = set() + for app_name in app_names: + try: + for t in await tool_provider.get_tools(app_name): + name = getattr(t, "name", None) or "" + if name and name not in seen_names: + seen_names.add(name) + bound.append(t) + except Exception as e: + logger.warning("bind_tools apps_and_tools: get_tools(%s) failed: %s", app_name, e) + + by_name_lookup: Dict[str, StructuredTool] = {} + if tool_names: + by_name_lookup = await _indexed_tools_for_native_bind(tool_provider, tools_context_ref) + + missing: List[str] = [] + if tool_names: + for tn in tool_names: + if tn in seen_names: + continue + t = by_name_lookup.get(tn) + if t is None: + missing.append(tn) + continue + seen_names.add(tn) + bound.append(t) + if missing: + logger.warning( + "cuga_lite_bind_tools_tool_names not found among provider tools (skipped): %s", + missing, + ) + + _merge_find_tools_into_bound( + bound, seen_names, include_find_tools=include_find_tools, tools_context_ref=tools_context_ref + ) + if not bound: + return active_model + return active_model.bind_tools(bound) + + if mode == "apps": + if not app_names: + if include_find_tools: + ft = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") + if ft: + return active_model.bind_tools([ft]) + logger.warning( + "cuga_lite_bind_tools_mode=apps but cuga_lite_bind_tools_apps is empty " + "(set include_find_tools to bind find_tools only)" + ) + return active_model + if not tool_provider: + logger.warning("cuga_lite_bind_tools_mode=apps but tool_provider is missing") + return active_model + + bound = [] + seen: Set[str] = set() + for app_name in app_names: + try: + for t in await tool_provider.get_tools(app_name): + name = getattr(t, "name", None) or "" + if name and name not in seen: + seen.add(name) + bound.append(t) + except Exception as e: + logger.warning("bind_tools apps: get_tools(%s) failed: %s", app_name, e) + _merge_find_tools_into_bound( + bound, seen, include_find_tools=include_find_tools, tools_context_ref=tools_context_ref + ) + if not bound: + return active_model + return active_model.bind_tools(bound) + + if mode == "tools": + if not tool_names: + if include_find_tools: + ft = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") + if ft: + return active_model.bind_tools([ft]) + logger.warning( + "cuga_lite_bind_tools_mode=tools but cuga_lite_bind_tools_tool_names is empty " + "(set include_find_tools to bind find_tools only)" + ) + return active_model + if not tool_provider: + logger.warning("cuga_lite_bind_tools_mode=tools but tool_provider is missing") + return active_model + by_name = await _indexed_tools_for_native_bind(tool_provider, tools_context_ref) + if not by_name: + return active_model + bound = [] + seen: Set[str] = set() + missing: List[str] = [] + for tn in tool_names: + t = by_name.get(tn) + if t is None: + missing.append(tn) + continue + if tn not in seen: + seen.add(tn) + bound.append(t) + if missing: + logger.warning( + "cuga_lite_bind_tools_tool_names not found among provider tools (skipped): %s", + missing, + ) + _merge_find_tools_into_bound( + bound, seen, include_find_tools=include_find_tools, tools_context_ref=tools_context_ref + ) + if not bound: + return active_model + return active_model.bind_tools(bound) + + logger.warning( + "Unknown cuga_lite_bind_tools_mode: %s (use none|find_tools|all|apps|tools|apps_and_tools)", + mode, + ) + except Exception as e: + logger.warning("resolve_model_with_bind_tools failed: %s", e) + return active_model + + +# ── Find-tools helpers (Task 2) ──────────────────────────────────────────── + +_BUNDLED_FIND_TOOLS_FEW_SHOT_JSON = ( + Path(__file__).resolve().parent / "prompts" / "find_tools_few_shot_examples.json" +) + + +def _first_user_message_text(chat_messages: Optional[List[BaseMessage]]) -> Optional[str]: + if not chat_messages: + return None + for msg in chat_messages: + if isinstance(msg, HumanMessage): + raw = msg.content + text = raw.strip() if isinstance(raw, str) else str(raw).strip() + return text or None + return None + + +def _compose_find_tools_shortlister_query(query: str, initial_user_message: Optional[str]) -> str: + q = query.strip() + init = (initial_user_message or "").strip() + if not init: + return q + return f"Query: {q}\nTask context (initial user message): {init}" + + +def _web_search_enabled() -> bool: + return bool(getattr(settings.advanced_features, "enable_web_search", False)) + + +def _ensure_web_app(apps: List[Any], all_apps: List[Any]) -> List[Any]: + if not _web_search_enabled() or any(getattr(app, "name", None) == "web" for app in apps): + return apps + web_app = next((app for app in all_apps if getattr(app, "name", None) == "web"), None) + if web_app: + return [*apps, web_app] + return apps + + +async def create_find_tools_tool( + all_tools, + all_apps: List[Any], + app_to_tools_map: Optional[Dict[str, List[StructuredTool]]] = None, + llm: Optional[Any] = None, + initial_user_message: Optional[str] = None, +) -> StructuredTool: + """Create a find_tools StructuredTool for tool discovery. + + Args: + all_tools: All available tools to search through + all_apps: All available app definitions + app_to_tools_map: Optional mapping of app_name -> list of tools. If provided, used for filtering by app_name. + initial_user_message: First human message in the session; combined with the tool `query` for shortlisting. + + Returns: + StructuredTool configured for finding relevant tools + """ + + async def find_tools_func(query: str, app_name: str): + """Search for relevant tools from the connected applications based on a natural language query. + + Args: + query: Natural language query describing what tools are needed to accomplish the task can include also which parameters are needed or the output expected + app_name: Name of a specific app to filter tools from. Only searches tools from that app. + + Returns: + Top 4 matching tools with their details + """ + if app_to_tools_map and app_name in app_to_tools_map: + filtered_tools = app_to_tools_map[app_name] + else: + logger.warning( + f"App '{app_name}' not found in app_to_tools_map. Available apps: {list(app_to_tools_map.keys()) if app_to_tools_map else 'N/A'}" + ) + filtered_tools = [] + + filtered_apps = [app for app in all_apps if hasattr(app, 'name') and app.name == app_name] + + if not filtered_apps: + logger.warning( + f"App '{app_name}' not found in available apps. Available apps: {[app.name if hasattr(app, 'name') else str(app) for app in all_apps]}" + ) + + shortlister_query = _compose_find_tools_shortlister_query(query, initial_user_message) + + try: + return await PromptUtils.find_tools( + query=shortlister_query, all_tools=filtered_tools, all_apps=filtered_apps, llm=llm + ) + except OutputParserException as e: + logger.bind( + query_len=len(shortlister_query), + error_type=type(e).__name__, + ).opt(exception=True).warning( + "Tool shortlisting failed due to parser error; returning error to agent" + ) + return ( + f"Tool shortlisting failed due to malformed response: {e}. " + "Please retry with a different query." + ) + except Exception as e: + logger.bind( + query_len=len(shortlister_query), + error_type=type(e).__name__, + ).opt(exception=True).warning("Tool shortlisting failed unexpectedly; returning error to agent") + return ( + f"Tool shortlisting failed due to an internal error: {e}. " + "Please retry with a different query." + ) + + return StructuredTool.from_function( + func=find_tools_func, + name="find_tools", + description="Search for relevant tools from a specific connected application based on a natural language query. Use this when you need to discover what tools are available for a specific task within a specific application.", + ) + + +def _resolve_find_tools_few_shot_json_path() -> Optional[Path]: + if _BUNDLED_FIND_TOOLS_FEW_SHOT_JSON.is_file(): + return _BUNDLED_FIND_TOOLS_FEW_SHOT_JSON + return None + + +def _load_default_find_tools_few_shot_examples() -> List[Dict[str, str]]: + from cuga.backend.cuga_graph.nodes.cuga_lite.prompt_utils import normalize_mcp_few_shot_examples + + path = _resolve_find_tools_few_shot_json_path() + if path is None: + logger.debug( + "Find-tools few-shot JSON not found (expected packaged %s or repo samples copy); skipping", + _BUNDLED_FIND_TOOLS_FEW_SHOT_JSON, + ) + return [] + try: + import json as _json + + raw = _json.loads(path.read_text(encoding="utf-8")) + normalized = normalize_mcp_few_shot_examples(raw) + if normalized: + logger.info(f"Loaded {len(normalized)} find_tools MCP few-shot turn(s) from {path}") + return normalized + except (OSError, _json.JSONDecodeError) as e: + logger.warning(f"Could not load find_tools few-shot JSON from {path}: {e}") + return [] + + +# ── Knowledge helpers (Task 4b) ─────────────────────────────────────────── + + +def _get_knowledge_tool_scope_context( + engine: Any | None, + thread_id: str | None, +) -> tuple[tuple[str, ...], str | None]: + config = getattr(engine, "_config", None) if engine else None + if not config or not getattr(config, "enabled", False): + return (), None + + scopes: list[str] = [] + if getattr(config, "agent_level_enabled", True): + scopes.append("agent") + if getattr(config, "session_level_enabled", True) and thread_id: + scopes.append("session") + + default_scope = "agent" if "agent" in scopes else scopes[0] if scopes else None + return tuple(scopes), default_scope + + +def _knowledge_scope_instruction(allowed_scopes: tuple[str, ...], thread_id: str | None) -> str: + if allowed_scopes == ("agent",): + return ( + "Knowledge scope rules for this run: only agent-level knowledge is available. " + "Never call `knowledge_*` tools with `scope=\"session\"`." + ) + if allowed_scopes == ("session",): + return ( + "Knowledge scope rules for this run: only session-level knowledge is available. " + "Never call `knowledge_*` tools with `scope=\"agent\"`. The conversation thread context is injected automatically." + ) + if allowed_scopes == ("agent", "session"): + return ( + "Knowledge scope rules for this run: both knowledge scopes are available. " + "Use `scope=\"agent\"` for permanent agent documents and `scope=\"session\"` for this conversation's documents." + ) + if thread_id: + return "Knowledge tools are unavailable in this run. Do not call any `knowledge_*` tool." + return ( + "Knowledge tools are unavailable in this run. " + "Session scope cannot be used here because there is no conversation thread context." + ) + + +def _decorate_knowledge_tool(tool: Any, allowed_scopes: tuple[str, ...], thread_id: str | None) -> None: + """Add a brief scope hint to the tool description. + + The full scope rules are already in the system instructions, so we only + add a short reminder here to avoid bloating the prompt with repeated text. + """ + base_description = getattr(tool, "description", "") or "Knowledge tool" + scopes_str = ", ".join(f'"{s}"' for s in allowed_scopes) + hint = f"Allowed scopes: {scopes_str}. See knowledge scope rules in instructions above." + tool.description = f"{base_description}\n\n{hint}".strip() + + +# ── Reflection helper (Task 5a) ──────────────────────────────────────────── + + +def _reflection_current_task(state: Any) -> str: + """Prefer ``sub_task``; else last user message that is not sandbox ``Execution output`` feedback.""" + if (state.sub_task or "").strip(): + return state.sub_task.strip() + if state.chat_messages: + execution_prefix = "Execution output:" + for msg in reversed(state.chat_messages): + if isinstance(msg, HumanMessage): + c = (msg.content or "").strip() + if c and not c.startswith(execution_prefix): + return c + return "" + + +# ── Todos helpers (Task 3) ───────────────────────────────────────────────── + + +class Todo(BaseModel): + """A single todo item with text and status.""" + + text: str = Field(..., description="The task description") + status: str = Field( + default="pending", + description="Status of the todo: 'pending', 'in_progress', or 'completed'", + ) + + +class TodosInput(BaseModel): + """Input schema for create_update_todos function.""" + + todos: List[Todo] = Field(..., description="List of todos, each with 'text' and 'status' fields") + + +class TodosOutput(BaseModel): + """Output schema for create_update_todos function.""" + + todos: List[Todo] = Field(..., description="List of todos with their current status") + + +def _try_parse_todos_payload(value: Any) -> Optional[List[Dict[str, Any]]]: + if not isinstance(value, dict) or "todos" not in value: + return None + raw = value["todos"] + if not isinstance(raw, list): + return None + if not raw: + return [] + if not all(isinstance(x, dict) and "text" in x and "status" in x for x in raw): + return None + return raw + + +def extract_task_todos_from_new_vars(new_vars: dict) -> Optional[List[Dict[str, Any]]]: + for val in new_vars.values(): + parsed = _try_parse_todos_payload(val) + if parsed is not None: + return parsed + return None + + +def _serialize_todos_for_store(todos_list: List[Any]) -> List[Dict[str, str]]: + out: List[Dict[str, str]] = [] + for t in todos_list: + if isinstance(t, Todo): + out.append({"text": t.text, "status": t.status}) + elif hasattr(t, "model_dump"): + d = t.model_dump() + out.append({"text": str(d.get("text", "")), "status": str(d.get("status", "pending"))}) + elif isinstance(t, dict): + out.append({"text": str(t.get("text", "")), "status": str(t.get("status", "pending"))}) + else: + out.append({"text": str(t), "status": "pending"}) + return out + + +async def create_update_todos_tool( + agent_state: Optional[Any] = None, + todos_store_ref: Optional[List[Dict[str, str]]] = None, +) -> StructuredTool: + """Create a create_update_todos StructuredTool for managing task todos. + + Args: + agent_state: Optional AgentState (reserved for future use) + todos_store_ref: Mutable list shared with the graph; latest todos are written here for the system prompt. + + Returns: + StructuredTool configured for creating and updating todos + """ + + async def create_update_todos_func(todos: Any) -> TodosOutput: + """Create or update a list of todos for complex multi-step tasks. + + Use this tool when you have a complex task that requires multiple steps. + This helps you track progress and organize your work. + + Args: + todos: List of todo dicts/models (matches ``TodosInput.todos`` / tool schema). + + Returns: + Short confirmation only (full list is shown in the system prompt via todos_store_ref). + """ + input_data = todos + # Handle different input types + if isinstance(input_data, TodosInput): + todos_list = input_data.todos + elif isinstance(input_data, dict): + # If it's a dict, check if it has 'todos' key + if 'todos' in input_data: + todos_list = input_data['todos'] + else: + # If no 'todos' key, treat the whole dict as a single todo or wrap it + todos_list = [input_data] + # Convert dict items to Todo models + todos_list = [Todo(**todo) if isinstance(todo, dict) else todo for todo in todos_list] + elif isinstance(input_data, list): + # If it's a list directly, convert each item to Todo + todos_list = [Todo(**todo) if isinstance(todo, dict) else todo for todo in input_data] + else: + # Fallback: try to create TodosInput + try: + if isinstance(input_data, dict): + input_data = TodosInput(**input_data) + else: + input_data = TodosInput(todos=input_data) + todos_list = input_data.todos + except Exception: + # Last resort: wrap in a list + todos_list = [Todo(**input_data) if isinstance(input_data, dict) else input_data] + + if todos_store_ref is not None: + serialized = _serialize_todos_for_store(todos_list) + todos_store_ref.clear() + todos_store_ref.extend(serialized) + + normalized = [t if isinstance(t, Todo) else Todo(**t) for t in todos_list] + return TodosOutput(todos=normalized) + + return StructuredTool.from_function( + func=create_update_todos_func, + name="create_update_todos", + description="Create or update a list of todos for complex multi-step tasks. Pass `todos` as a list of objects with 'text' and 'status' ('pending', 'in_progress', or 'completed'). Returns a todos payload; the full list is shown in the system prompt under 'Current task todos' (Current Plan).", + args_schema=TodosInput, + return_direct=False, + ) + def _clean_empty_response_retry_meta(meta: Optional[Dict[str, Any]]) -> Dict[str, Any]: m = {**(meta or {})} @@ -128,12 +858,26 @@ def __init__( task_todos_ref: List[Dict[str, str]], tools_context_ref: Optional[Dict[str, Any]], base_tool_provider: Any, + model: Any = None, + prompt_template: Any = None, + instructions: Any = None, + special_instructions: Any = None, + tools_context: Optional[Dict[str, Any]] = None, + static_prompt: Any = None, + thread_id: Any = None, ) -> None: self._tracker = tracker self._base_callbacks = base_callbacks or [] self._task_todos_ref = task_todos_ref self._tools_context_ref = tools_context_ref self._base_tool_provider = base_tool_provider + self._model = model + self._prompt_template = prompt_template + self._instructions = instructions + self._special_instructions = special_instructions + self._tools_context = tools_context if tools_context is not None else {} + self._static_prompt = static_prompt + self._thread_id = thread_id # ── Abstract method implementations ─────────────────────────────────── @@ -191,10 +935,6 @@ class _FakeResponse: raise async def resolve_bind_tools(self, state: Any, active_model: Any, configurable: dict) -> Any: - from cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_graph import ( - resolve_model_with_bind_tools, - ) - try: return await resolve_model_with_bind_tools( active_model, @@ -204,7 +944,7 @@ async def resolve_bind_tools(self, state: Any, active_model: Any, configurable: ) except Exception as exc: logger.warning("AgentGraphAdapter.resolve_bind_tools failed: %s", exc) - return None + return None # ── Post-invocation hook overrides ──────────────────────────────────── @@ -222,8 +962,6 @@ def normalize_response(self, response: Any) -> Tuple[str, Optional[str]]: def on_response_processed(self, state: Any, code: Optional[str], content: str) -> None: try: - from cuga.backend.activity_tracker.tracker import Step - self._tracker.collect_step(step=Step(name="Raw_Assistant_Response", data=content)) if code: self._tracker.collect_step(step=Step(name="Assistant_code", data=content)) @@ -242,3 +980,804 @@ async def classify_auto_continue( self, state: Any, model: Any, content: str, reasoning: Optional[str] ) -> bool: return await classify_nl_auto_continue(model, content, reasoning) + + # ── Node factory methods (Tasks 4c and 5b) ──────────────────────────── + + def build_prepare_node(self, lc_bind_tools_meta: dict): + """Return the prepare_tools_and_apps async node.""" + + async def prepare_tools_and_apps(state: Any, config: Optional[RunnableConfig] = None) -> Command: + """Prepare tools, apps, and prompt once at the start of the graph. + + This node gets tools from tool_provider, filters based on state configuration, + determines if find_tools should be enabled, and prepares the prompt. + Tools are available via closure (per graph instance), prompt is stored in state. + + enable_todos is read from config["configurable"] at runtime. + + Optional configurable key ``mcp_few_shot_examples``: overrides few-shots—a JSON string or + list of dicts with ``role`` and ``content``. If absent (or explicitly ``None``) and + ``find_tools`` is enabled, ``prompts/find_tools_few_shot_examples.json`` (bundled next to the + MCP template) is loaded, with optional fallback to repo ``samples/cuga_lite/mcp_few_shot_examples.json``. + Bundled few-shots only apply when ``find_tools`` shortlisting is active + (``total_tool_count > shortlisting_tool_threshold``, see settings configurable). + + Disable few-shots entirely via ``advanced_features.cuga_lite_enable_few_shots`` in settings.toml + or ``cuga_lite_enable_few_shots`` in configurable (skips prefix chat few-shots). + """ + configurable = config.get("configurable", {}) if config else {} + enable_todos = ( + configurable.get("enable_todos") + if "enable_todos" in configurable + else settings.advanced_features.enable_todos + ) + shortlisting_threshold = ( + configurable.get("shortlisting_tool_threshold") + if "shortlisting_tool_threshold" in configurable + else settings.advanced_features.shortlisting_tool_threshold + ) + _runtime_model_name = resolved_runtime_model_name( + configurable_llm=configurable.get("llm"), + graph_default_model=self._model, + ) + few_shots_enabled = resolve_cuga_lite_few_shots_enabled( + configurable, + model_name=_runtime_model_name, + ) + logger.debug( + f"[APPROVAL DEBUG] prepare_tools_and_apps received cuga_lite_metadata: {state.cuga_lite_metadata}" + ) + + # Skip policy checking if policies are disabled or if we're returning from approval + if settings.policy.enabled and not ToolApprovalHandler.should_skip_policy_check(self, state): + # Check for policies and enact if matched + # Include IntentGuard, Playbook, and ToolGuide for intent checks + from cuga.backend.cuga_graph.policy.models import PolicyType + + command, metadata = await PolicyEnactment.check_and_enact( + state, + config, + policy_types=[PolicyType.INTENT_GUARD, PolicyType.PLAYBOOK, PolicyType.TOOL_GUIDE], + adapter=self, + ) + + # If policy returned a command (e.g., BLOCK_INTENT), execute it immediately + if command: + return command + + # If policy returned metadata (e.g., playbook guidance), store it + if metadata: + self.set_metadata(state, metadata) + elif not settings.policy.enabled: + logger.debug("Policy system disabled - skipping policy checks") + else: + logger.info("[APPROVAL DEBUG] Skipping policy check - user has already approved") + + if not self._base_tool_provider: + raise ValueError("tool_provider is required") + + # Get total tool count across ALL apps (for shortlisting threshold - not per app) + all_tools_total = await self._base_tool_provider.get_all_tools() + total_tool_count = len(all_tools_total) if all_tools_total else 0 + + # Get tools from provider + apps_for_prompt = None + app_to_tools_map = {} + + # Get apps from state and filter tools if specific app is selected + if state.sub_task_app: + # Specific app selected - filter tools to only this app + all_apps = await self._base_tool_provider.get_apps() + # add here the implementation of force_ + force_lite_apps = getattr(settings.advanced_features, 'force_lite_mode_apps', []) + if force_lite_apps: + allowed_apps_names = list(set([state.sub_task_app] + force_lite_apps)) + if _web_search_enabled(): + allowed_apps_names.append("web") + # call authenticate_apps for the allowed apps + if settings.advanced_features.benchmark == "appworld": + await TaskAnalyzer.call_authenticate_apps(force_lite_apps) + apps_for_prompt = [app for app in all_apps if app.name in allowed_apps_names] + else: + apps_for_prompt = [app for app in all_apps if app.name == state.sub_task_app] + apps_for_prompt = _ensure_web_app(apps_for_prompt, all_apps) + # Get only tools for this specific app + tools_for_execution = [] + for app in apps_for_prompt: + current_tools_for_execution = await self._base_tool_provider.get_tools(app.name) + app_to_tools_map[app.name] = current_tools_for_execution + tools_for_execution.extend(current_tools_for_execution) + + logger.info( + f"Filtered to {len(tools_for_execution)} tools for {len(apps_for_prompt)} identified apps" + ) + elif state.api_intent_relevant_apps: + # Filter to API apps + all_apps = await self._base_tool_provider.get_apps() + apps_for_prompt = [ + app + for app in state.api_intent_relevant_apps + if hasattr(app, 'type') and app.type == 'api' + ] + apps_for_prompt = _ensure_web_app(apps_for_prompt, all_apps) + # Get tools only for the identified apps + tools_for_execution = [] + for app in apps_for_prompt: + app_tools = await self._base_tool_provider.get_tools(app.name) + app_to_tools_map[app.name] = app_tools + tools_for_execution.extend(app_tools) + logger.info( + f"Filtered to {len(tools_for_execution)} tools for {len(apps_for_prompt)} identified apps" + ) + else: + # Get all tools and apps + all_apps = await self._base_tool_provider.get_apps() + apps_for_prompt = all_apps + tools_for_execution = all_tools_total or [] + # Build mapping for all apps + for app in apps_for_prompt: + app_tools = await self._base_tool_provider.get_tools(app.name) + app_to_tools_map[app.name] = app_tools + + enable_find_tools = total_tool_count > shortlisting_threshold or _web_search_enabled() + + if enable_find_tools: + logger.info( + f"Auto-enabling find_tools: total {total_tool_count} tools (across all apps) exceeds threshold of {shortlisting_threshold}" + ) + + # Prepare prompt + is_autonomous_subtask = state.sub_task is not None and state.sub_task.strip() != "" + + # TODO: Add task loaded from file support this happens when we load file as playboook + task_loaded_from_file = False # Not used in current flow + + # Prepare tools for prompt - if find_tools enabled, only expose find_tools + tools_for_prompt = tools_for_execution + if enable_find_tools: + active_model = configurable.get("llm") + find_tool = await create_find_tools_tool( + all_tools=tools_for_execution, + all_apps=apps_for_prompt, + app_to_tools_map=app_to_tools_map, + llm=active_model, + initial_user_message=_first_user_message_text(state.chat_messages), + ) + tools_for_prompt = [find_tool] + # Add find_tools to tools context for sandbox execution + # Wrap to make awaitable (agent always uses await) + # Prefer coroutine over func to avoid run_in_executor issues + find_tool_func = ( + find_tool.coroutine + if hasattr(find_tool, 'coroutine') and find_tool.coroutine + else find_tool.func + ) + self._tools_context['find_tools'] = make_tool_awaitable(find_tool_func) + if lc_bind_tools_meta is not None: + lc_bind_tools_meta["_lc_bind_tools_find_tools"] = find_tool + logger.info( + "Exposing only find_tools in prompt (all tools + find_tools available in execution context)" + ) + + if few_shots_enabled: + if "mcp_few_shot_examples" in configurable: + raw_fs = configurable["mcp_few_shot_examples"] + if raw_fs is not None: + few_shot_examples = normalize_mcp_few_shot_examples(raw_fs) + elif enable_find_tools: + few_shot_examples = _load_default_find_tools_few_shot_examples() + else: + few_shot_examples = [] + elif enable_find_tools: + few_shot_examples = _load_default_find_tools_few_shot_examples() + else: + few_shot_examples = [] + logger.debug( + "Bundled MCP few-shots (prompts/find_tools_few_shot_examples.json) not loaded: find_tools " + "is off " + f"(total_tool_count={total_tool_count} <= shortlisting_tool_threshold=" + f"{shortlisting_threshold}). Lower the threshold via configurable or add apps/tools." + ) + else: + few_shot_examples = [] + logger.debug("MCP few-shots disabled (cuga_lite_enable_few_shots=false)") + if few_shot_examples: + logger.debug(f"MCP few-shot examples: {len(few_shot_examples)} turns") + + # Add create_update_todos tool for complex task management if enabled + if enable_todos: + todos_tool = await create_update_todos_tool( + agent_state=state, todos_store_ref=self._task_todos_ref + ) + tools_for_prompt.append(todos_tool) + # Add to tools context for sandbox execution + # Prefer coroutine over func to avoid run_in_executor issues + todos_tool_func = ( + todos_tool.coroutine + if hasattr(todos_tool, 'coroutine') and todos_tool.coroutine + else todos_tool.func + ) + self._tools_context['create_update_todos'] = make_tool_awaitable(todos_tool_func) + + # Apply tool guide if guides exist in metadata and haven't been applied yet + # Guides should apply regardless of whether a playbook matched + if settings.policy.enabled and state.cuga_lite_metadata: + # Check if guides exist (either as separate guides list or legacy format) + has_guides = ( + state.cuga_lite_metadata.get("guides") + or state.cuga_lite_metadata.get("guide_content") + or state.cuga_lite_metadata.get("policy_type") == "tool_guide" + or state.cuga_lite_metadata.get("has_guides", False) + ) + + if has_guides: + tools_for_execution = PolicyEnactment.apply_tool_guide( + tools_for_execution, state.cuga_lite_metadata + ) + tools_for_prompt = PolicyEnactment.apply_tool_guide( + tools_for_prompt, state.cuga_lite_metadata + ) + # Mark guides as applied to prevent re-application + state.cuga_lite_metadata["guides_applied"] = True + logger.info("Applied tool guide from policy") + else: + logger.debug("No tool guides found in metadata") + + skill_tools = [] + skills_prompt_section = "" + skills_enabled = False + configurable_special = ( + (config or {}).get("configurable", {}).get("special_instructions") if config else None + ) + effective_special = self._special_instructions or configurable_special or "" + skills_cfg_on = getattr(settings.skills, "enabled", False) + cuga_folder_for_skills = os.getenv("CUGA_FOLDER", settings.policy.cuga_folder) + if skills_cfg_on: + skill_entries = discover_skills(cuga_folder_for_skills) + if skill_entries: + skill_registry = SkillRegistry(skill_entries) + skill_tools = create_skill_tools(skill_registry) + tools_for_prompt.extend(skill_tools) + skills_prompt_section = format_available_skills_block(skill_registry) + skills_enabled = True + logger.info( + f"Loaded {len(skill_entries)} agent skill(s) from .agents/skills and " + f"~/.config/agents/skills with legacy {cuga_folder_for_skills}/skills and " + "~/.config/cuga/skills fallbacks" + ) + + # Resolve thread_id early for per-thread workspace selection. + _cfg_for_thread = config.get("configurable", {}) if config else {} + _runtime_thread_id_for_fs = _cfg_for_thread.get("thread_id") or state.thread_id or self._thread_id + + # Update tools context with all execution tools. + # Wrap to make awaitable (agent always uses await). Filesystem path + # rewriting is no longer needed here — filesystem tools come from + # the consolidated runtime class below, not from MCP. + for tool in tools_for_execution: + # Extract tool function - StructuredTool may use .func, .coroutine, or ._run + # IMPORTANT: Prefer coroutine over func to avoid run_in_executor issues + # with tools that have async implementations (like MCP tools) + tool_func = None + if hasattr(tool, 'coroutine') and tool.coroutine: + # Prefer async coroutine - avoids run_in_executor timeout issues + tool_func = tool.coroutine + elif hasattr(tool, 'func') and tool.func: + tool_func = tool.func + else: + tool_func = getattr(tool, '_run', None) + + if tool_func: + self._tools_context[tool.name] = make_tool_awaitable(tool_func) + else: + logger.warning(f"Tool '{tool.name}' has no callable function, skipping") + + for tool in skill_tools: + tool_func = None + if hasattr(tool, "coroutine") and tool.coroutine: + tool_func = tool.coroutine + elif hasattr(tool, "func") and tool.func: + tool_func = tool.func + else: + tool_func = getattr(tool, "_run", None) + if tool_func: + self._tools_context[tool.name] = make_tool_awaitable(tool_func) + else: + logger.warning(f"Skill tool '{tool.name}' has no callable, skipping") + + # Inject the consolidated filesystem tools + run_command via the + # shared runtime_tools orchestrator. Backend selection and gating + # live in cuga_agent_core (behavior-identical to the previous + # inline block); filesystem and run_command remain independently + # gated by enable_filesystem_tools / enable_shell_tool. + _runtime_backends = resolve_runtime_backends(settings, configurable) + + if _runtime_backends.filesystem != "none" or _runtime_backends.shell != "none": + cfg = config.get("configurable", {}) if config else {} + runtime_thread_id = ( + cfg["thread_id"] if "thread_id" in cfg else (state.thread_id or self._thread_id) + ) + else: + runtime_thread_id = None + + _runtime_bundle = build_runtime_tools(thread_id=runtime_thread_id, backends=_runtime_backends) + self._tools_context.update(_runtime_bundle.execution_callables) + tools_for_prompt.extend(_runtime_bundle.prompt_tools) + if _runtime_bundle.app_definitions and apps_for_prompt is not None: + apps_for_prompt = list(apps_for_prompt) + _runtime_bundle.app_definitions + + from cuga.backend.evolve.memory import build_evolve_special_instructions_extension + + special_instructions_final = effective_special or "" + _split_note = split_execution_note(ExecutionRouter.resolve(settings)) + if _split_note: + special_instructions_final = (special_instructions_final + "\n\n" + _split_note).strip() + evolve_extension = await build_evolve_special_instructions_extension( + state=state, + configurable=configurable, + timeout=settings.evolve.timeout, + ) + if evolve_extension: + special_instructions_final = (special_instructions_final or "") + evolve_extension + + cfg = config.get("configurable", {}) if config else {} + _thread_id = cfg.get("thread_id") or "" + _knowledge_engine = cfg.get("knowledge_engine") + if _knowledge_engine is None: + try: + from cuga.backend.server.main import app as _app + + _app_state = getattr(_app.state, "app_state", None) + _knowledge_engine = getattr(_app_state, "knowledge_engine", None) if _app_state else None + except Exception: + _knowledge_engine = None + + allowed_knowledge_scopes, default_knowledge_scope = _get_knowledge_tool_scope_context( + _knowledge_engine, + _thread_id or None, + ) + + knowledge_tool_names = { + tool.name + for tool in tools_for_execution + if getattr(tool, "name", "").startswith("knowledge_") + } + + if knowledge_tool_names and not allowed_knowledge_scopes: + tools_for_execution = [ + tool + for tool in tools_for_execution + if getattr(tool, "name", "") not in knowledge_tool_names + ] + tools_for_prompt = [ + tool for tool in tools_for_prompt if getattr(tool, "name", "") not in knowledge_tool_names + ] + apps_for_prompt = [ + app for app in (apps_for_prompt or []) if getattr(app, "name", "") != "knowledge" + ] + for tool_name in knowledge_tool_names: + self._tools_context.pop(tool_name, None) + elif knowledge_tool_names: + if _thread_id: + logger.debug("Knowledge tools: thread context available for session scope injection") + + def _wrap_knowledge_tool(fn, tid, allowed_scopes, default_scope): + async def _wrapped(*args, **kwargs): + scope = kwargs.get("scope") + if scope is None and default_scope: + kwargs["scope"] = default_scope + scope = default_scope + if scope is not None and scope not in allowed_scopes: + allowed_text = ", ".join(allowed_scopes) + return { + "error": ( + f"Knowledge scope '{scope}' is unavailable in this context. " + f"Allowed scopes: {allowed_text}" + ) + } + if tid and "session" in allowed_scopes: + kwargs.setdefault("thread_id", tid) + return await fn(*args, **kwargs) + + _wrapped.__doc__ = getattr(fn, "__doc__", None) + _wrapped._knowledge_allowed_scopes = allowed_scopes + _wrapped._knowledge_default_scope = default_scope + _wrapped._knowledge_thread_id = tid + return _wrapped + + for tool_name in knowledge_tool_names: + original_fn = self._tools_context.get(tool_name) + if original_fn: + self._tools_context[tool_name] = _wrap_knowledge_tool( + original_fn, + _thread_id, + allowed_knowledge_scopes, + default_knowledge_scope, + ) + + # Note: scope rules are injected once via effective_instructions. + # No per-tool decoration needed — avoids repeated text in prompt. + + # Inject knowledge base awareness if knowledge tools are available + effective_instructions = self._instructions + # Detect knowledge tools — works for both registry (app named + # "knowledge") and SDK mode (tools under "runtime_tools") + has_knowledge_tools = any( + getattr(app, "name", "") == "knowledge" for app in (apps_for_prompt or []) + ) + if not has_knowledge_tools and tools_for_execution: + has_knowledge_tools = any( + getattr(t, "name", "").startswith("knowledge_") for t in tools_for_execution + ) + knowledge_scope_instruction = _knowledge_scope_instruction( + allowed_knowledge_scopes, + _thread_id or None, + ) + if knowledge_tool_names: + effective_instructions = ( + f"{knowledge_scope_instruction}\n\n{effective_instructions}" + if effective_instructions + else knowledge_scope_instruction + ) + if has_knowledge_tools: + try: + from cuga.backend.knowledge.awareness import ( + get_knowledge_summary, + format_knowledge_context, + get_engine_from_app_state, + ) + + cfg = config.get("configurable", {}) + engine = cfg.get("knowledge_engine") or get_engine_from_app_state() + # Get agent_id: configurable > app_state > fallback + agent_id = cfg.get("agent_id") + knowledge_config_hash = cfg.get("knowledge_config_hash") + if not agent_id: + try: + from cuga.backend.server.main import app as _app + + _as = getattr(_app.state, "app_state", None) + agent_id = getattr(_as, "agent_id", None) if _as else None + if knowledge_config_hash is None: + knowledge_config_hash = ( + getattr(_as, "knowledge_config_hash", None) if _as else None + ) + except Exception: + pass + if not agent_id: + agent_id = "cuga-default" + awareness_thread_id = cfg.get("thread_id") + kb_ctx = format_knowledge_context( + agent_id, + awareness_thread_id, + engine=engine, + agent_config_hash=knowledge_config_hash, + ) + logger.info( + f"Knowledge awareness: agent_id={agent_id}, thread_id={awareness_thread_id}, " + f"agent_collection={kb_ctx.get('agent_collection')}, " + f"session_collection={kb_ctx.get('session_collection')}" + ) + + if not engine: + logger.warning("Knowledge awareness skipped: engine not available") + else: + # Use draft knowledge config for search-time params when running + # in draft mode (Try-It-Out). Published agent always uses engine config. + _search_cfg = engine._config + _is_draft = agent_id and agent_id.endswith("--draft") + if _is_draft: + try: + from cuga.backend.server.main import app as _app + + _das = getattr(_app.state, "draft_app_state", None) + _draft_kc = getattr(_das, "draft_knowledge_config", None) if _das else None + if _draft_kc: + _search_cfg = _draft_kc + except Exception: + pass + knowledge_block = await get_knowledge_summary( + engine, + agent_collection=kb_ctx.get("agent_collection"), + session_collection=kb_ctx.get("session_collection"), + max_search_attempts=getattr(_search_cfg, "max_search_attempts", None) + or getattr(engine._config, "max_search_attempts", None), + default_limit=getattr(_search_cfg, "default_limit", None) + or getattr(engine._config, "default_limit", None), + rag_profile=getattr(_search_cfg, "rag_profile", None) + or getattr(engine._config, "rag_profile", "standard"), + ) + if knowledge_block: + # Load knowledge search instructions from dedicated file + knowledge_instructions_text = "" + try: + kb_instructions_path = ( + Path(__file__).parents[4] + / "configurations" + / "knowledge" + / "knowledge_instructions.md" + ) + if kb_instructions_path.exists(): + knowledge_instructions_text = kb_instructions_path.read_text( + encoding="utf-8" + ).strip() + except Exception as ki_err: + logger.debug(f"Failed to load knowledge instructions: {ki_err}") + + # Prepend knowledge block BEFORE other instructions + # so the LLM sees it early and acts on it + effective_instructions = ( + f"{knowledge_block}\n\n{knowledge_instructions_text}\n\n{effective_instructions}" + if effective_instructions + else f"{knowledge_block}\n\n{knowledge_instructions_text}" + ) + logger.info(f"Knowledge awareness injected: {len(knowledge_block)} chars") + except Exception as e: + logger.debug(f"Knowledge awareness injection skipped: {e}") + if lc_bind_tools_meta is not None: + lc_bind_tools_meta["_lc_bind_tools_overlay_structured_tools"] = [ + t for t in (tools_for_prompt or []) if getattr(t, "name", None) + ] + + # Create prompt dynamically + dynamic_prompt = self._static_prompt + + if not dynamic_prompt: + dynamic_prompt = create_mcp_prompt( + tools_for_prompt, + allow_user_clarification=True, + return_to_user_cases=None, + instructions=effective_instructions, + apps=apps_for_prompt, + task_loaded_from_file=task_loaded_from_file, + is_autonomous_subtask=settings.advanced_features.force_autonomous_mode + or is_autonomous_subtask, + prompt_template=self._prompt_template, + enable_find_tools=enable_find_tools, + enable_todos=enable_todos, + special_instructions=special_instructions_final, + skills_enabled=skills_enabled, + skills_prompt_section=skills_prompt_section, + enable_shell_tool=getattr(settings.advanced_features, "enable_shell_tool", False), + has_knowledge=has_knowledge_tools, + few_shot_examples=few_shot_examples, + few_shots_enabled=few_shots_enabled, + ) + logger.info( + "Prepared CugaLite prompt: enable_find_tools={} few_shot_message_turns={} " + "few_shots_as_messages={} prompt_chars={}", + enable_find_tools, + len(few_shot_examples), + bool(few_shot_examples), + len(dynamic_prompt), + ) + else: + logger.info( + "Using static CugaLite prompt; dynamic few-shot injection skipped " + "(enable_find_tools={} few_shot_turns={})", + enable_find_tools, + len(few_shot_examples), + ) + + reflection_apps_snapshot = format_apps_for_prompt(apps_for_prompt or []) + + return Command( + goto="call_model", + update={ + "tools_prepared": True, + "prepared_prompt": dynamic_prompt, + "step_count": 0, + "cuga_lite_metadata": state.cuga_lite_metadata, + "reflection_apps": reflection_apps_snapshot, + "reflection_enable_find_tools": enable_find_tools, + "reflection_skills_enabled": skills_enabled, + "reflection_skills_prompt_section": skills_prompt_section, + "mcp_few_shot_messages": few_shot_examples, + }, + ) + + return prepare_tools_and_apps + + def build_sandbox_node(self, base_thread_id: Any, base_apps_list: Any): + """Return the sandbox async node.""" + + async def sandbox(state: Any, config: Optional[RunnableConfig] = None): + """Execute code in sandbox and return results.""" + from cuga.backend.cuga_graph.nodes.cuga_lite.tool_call_tracker import ToolCallTracker + + # Check if user denied approval (only if policies are enabled) + if settings.policy.enabled: + denial_command = ToolApprovalHandler.handle_denial(self, state) + if denial_command: + return denial_command + + configurable = config.get("configurable", {}) if config else {} + max_steps = ( + configurable.get("cuga_lite_max_steps") if "cuga_lite_max_steps" in configurable else None + ) + if "thread_id" in configurable: + current_thread_id = configurable["thread_id"] + else: + current_thread_id = state.thread_id or base_thread_id + current_apps_list = configurable.get("apps_list", base_apps_list) + track_tool_calls = configurable.get("track_tool_calls", False) + reflection_enabled = ( + configurable.get("reflection_enabled") + if "reflection_enabled" in configurable + else settings.advanced_features.reflection_enabled + ) + + # Get existing variables using CugaLiteState's own variables_manager + existing_vars = {} + for var_name in list(state.variables_manager.get_variable_names()): + var_value = state.variables_manager.get_variable(var_name) + if is_find_tools_listing_markdown(var_value): + state.variables_manager.remove_variable(var_name) + continue + existing_vars[var_name] = var_value + + # Add tools to context + context = {**existing_vars, **self._tools_context} + + # Start tool call tracking (only if enabled via invoke parameter) + ToolCallTracker.start_tracking(enabled=track_tool_calls) + + try: + # Execute the script - pass the CugaLiteState itself since it has variables_manager + _exec_plan = ExecutionRouter.resolve(settings) + if _exec_plan.split_execution_active: + logger.info( + "Split execution: python=%s shell=%s fs=%s", + _exec_plan.python_backend, + _exec_plan.shell_backend, + _exec_plan.filesystem_backend, + ) + output, new_vars = await CodeExecutor.eval_with_tools_async( + code=state.script, + _locals=context, + state=state, # Pass CugaLiteState - it has variables_manager property + thread_id=current_thread_id, + apps_list=current_apps_list, + plan=_exec_plan, + ) + + self._tracker.collect_step(step=Step(name="User_output", data=output)) + self._tracker.collect_step( + step=Step( + name="User_output_variables", + data=json.dumps( + new_vars, + default=lambda o: o.model_dump() if hasattr(o, "model_dump") else str(o), + ), + ) + ) + + # Output is already formatted and trimmed by code_executor + logger.debug(f"\n\n------\n\n📝 Execution output:\n\n{output}\n\n------\n\n") + + # Update variables using CugaLiteState's variables_manager + # This automatically updates state.variables_storage + for name, value in new_vars.items(): + if is_find_tools_listing_markdown(value): + continue + state.variables_manager.add_variable( + value, name=name, description="Created during code execution" + ) + + reflection_output = "" + if reflection_enabled: + try: + active_model = configurable.get("llm") or _llm_manager.get_model( + settings.agent.planner.model + ) + reflection_agent = reflection_task(llm=active_model) + # Format chat messages as history string + agent_history_parts = [] + for msg in state.chat_messages: + if isinstance(msg, HumanMessage): + agent_history_parts.append(f"User: {msg.content}") + elif isinstance(msg, AIMessage): + agent_history_parts.append(f"Assistant: {msg.content}") + else: + agent_history_parts.append( + f"{type(msg).__name__}: {getattr(msg, 'content', str(msg))}" + ) + agent_history = ( + "\n".join(agent_history_parts) + if agent_history_parts + else "No previous conversation history" + ) + reflection_result = await reflection_agent.ainvoke( + { + "instructions": "", + "current_task": _reflection_current_task(state) or "(no task text)", + "agent_history": agent_history, + "coder_agent_output": output, + "apps": state.reflection_apps or [], + "enable_find_tools": state.reflection_enable_find_tools, + "skills_enabled": state.reflection_skills_enabled, + "skills_prompt_section": state.reflection_skills_prompt_section, + "force_autonomous_mode": settings.advanced_features.force_autonomous_mode, + } + ) + reflection_output = reflection_result.content + logger.debug(f"Reflection output:\n{reflection_output}") + except Exception as e: + logger.warning(f"Reflection failed: {e}") + reflection_output = "" + + # Output is already formatted by code_executor + execution_message_content = execution_output_text(output) + if reflection_output: + execution_message_content = ( + f"{execution_message_content}\n\n---\n\nSummary:\n{reflection_output}" + ) + + self._tracker.collect_step( + step=Step( + name="User_return", + data=execution_message_content, + ) + ) + + new_message = HumanMessage(content=execution_message_content) + updated_messages, error_message = _core_append_with_step_limit( + self, state, [new_message], max_steps + ) + + # Collect tool calls from this execution + execution_tool_calls = ToolCallTracker.stop_tracking() + accumulated_tool_calls = (state.tool_calls or []) + execution_tool_calls + + if error_message: + return _core_create_error_command( + self, + updated_messages, + error_message, + state.step_count, + additional_updates={ + "variables_storage": state.variables_storage, + "variable_counter_state": state.variable_counter_state, + "variable_creation_order": state.variable_creation_order, + "tool_calls": accumulated_tool_calls, + }, + ) + + todo_state_update = extract_task_todos_from_new_vars(new_vars) + base_update = { + "chat_messages": updated_messages, + "variables_storage": state.variables_storage, + "variable_counter_state": state.variable_counter_state, + "variable_creation_order": state.variable_creation_order, + "step_count": state.step_count + 1, + "tool_calls": accumulated_tool_calls, + } + if todo_state_update is not None: + base_update["task_todos"] = todo_state_update + return base_update + except Exception as e: + # Collect tool calls even on error + execution_tool_calls = ToolCallTracker.stop_tracking() + accumulated_tool_calls = (state.tool_calls or []) + execution_tool_calls + + error_msg = f"Error during execution: {str(e)}" + logger.error(error_msg) + new_message = HumanMessage(content=error_msg) + updated_messages, limit_error_message = _core_append_with_step_limit( + self, state, [new_message], max_steps + ) + + if limit_error_message: + return _core_create_error_command( + self, updated_messages, limit_error_message, state.step_count + ) + + return { + "chat_messages": updated_messages, + "error": error_msg, + "execution_complete": True, + "step_count": state.step_count + 1, + "tool_calls": accumulated_tool_calls, + } + + return sandbox diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/cuga_lite_graph.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/cuga_lite_graph.py index 717b983e..2554e2f7 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/cuga_lite_graph.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/cuga_lite_graph.py @@ -1,566 +1,41 @@ -""" -CugaLite LangGraph - Unified subgraph combining CugaAgent and CodeAct - -TODO: Multi-user, multi-model, multi-tools dependency injection refactoring ----------------------------------------------------------------------------- -CURRENT STATE: Supports multi-user but with same configuration (shared model, tools, memory backend) -GOAL: Enable per-user configuration with isolated models, tools, and memory - -This class needs architectural changes to support multi-user, multi-model, and multi-tools scenarios: - -1. Multi-Tools Client Dependency Injection: - - Replace direct tool_provider parameter with injectable tools_client per user_id - - Use LangGraph's configurable pattern to inject tools_client at runtime - - Store tools_client in graph config/state rather than closure - - Enable per-user tool access control and isolation - - Support dynamic tool loading/unloading per user session - - Allow different tool sets per user based on permissions/subscriptions - -2. Multi-Model Support: - - Replace hardcoded model parameter with injectable model per user/session - - Support per-user model selection (different LLMs for different users) - - Enable model switching mid-conversation via configurable - - Inject llm_manager or model_client rather than direct model instance - - Support model-specific configurations (temperature, max_tokens, etc.) per user - -3. Multi-User Memory/Storage: - - Implement user_id-scoped memory storage (variables_storage, chat_messages) - - Use injectable memory backend (e.g., per-user checkpointer) - - Migrate from global state to user-scoped state partitioning - - Consider LangGraph's built-in checkpointing with user_id as partition key - - Isolate conversation history and variables per user - -4. LangGraph Configurable Dependencies: - - Leverage config["configurable"] for runtime dependency injection - - Pass user_id, tools_client, model_client, memory_backend via configurable - - Remove hardcoded global instances (tracker, llm_manager) - - Make all external dependencies (LLM, tools, memory) injectable - - Support per-request configuration overrides - -5. State Isolation: - - Ensure CugaLiteState is scoped per user session - - Add user_id field to state for tracking and isolation - - Implement proper state cleanup between user sessions - - Consider multi-tenancy patterns for shared resources - - Track model_id and tools_version in state for debugging +"""CugaLite LangGraph — thin wiring module. -6. Thread Safety: - - Ensure thread-safe access to user-scoped resources - - Avoid shared mutable state between users - - Use proper async/await patterns for concurrent user requests - - Handle concurrent model/tool requests from different users +State class, loop adapter, and the ``create_cuga_lite_graph`` factory. +All node logic lives in ``AgentGraphAdapter``. """ -import os -import json from pathlib import Path -from typing import Any, Optional, Sequence, Dict, List, Tuple, Set -from loguru import logger -from pydantic import BaseModel, Field - +from typing import Any, Dict, List, Optional, Tuple -from langchain_core.language_models import BaseChatModel -from langchain_core.tools import StructuredTool -from langchain_core.runnables import RunnableConfig from langchain_core.callbacks import BaseCallbackHandler -from langchain_core.messages import BaseMessage, AIMessage, HumanMessage - +from langchain_core.language_models import BaseChatModel +from langchain_core.messages import AIMessage, BaseMessage from langgraph.graph import StateGraph from langgraph.types import Command +from pydantic import BaseModel, Field -from cuga.backend.cuga_graph.nodes.task_decomposition_planning.analyze_task import TaskAnalyzer -from cuga.backend.activity_tracker.tracker import ActivityTracker, Step -from cuga.backend.llm.models import LLMManager -from cuga.backend.cuga_graph.state.agent_state import AgentState -from cuga.backend.cuga_graph.nodes.cuga_lite.prompt_utils import ( - create_mcp_prompt, - format_apps_for_prompt, - normalize_mcp_few_shot_examples, - PromptUtils, - resolve_cuga_lite_few_shots_enabled, -) -from cuga.backend.cuga_graph.nodes.cuga_lite.executors.code_executor import ( - CodeExecutor, - is_find_tools_listing_markdown, -) -from cuga.backend.cuga_graph.nodes.cuga_agent_core.code_extraction import ( - make_tool_awaitable, -) -from cuga.backend.cuga_graph.nodes.cuga_agent_core.runtime_tools import ( - build_runtime_tools, - resolve_runtime_backends, -) +from cuga.backend.activity_tracker.tracker import ActivityTracker from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import ( CoreGraphAdapter, append_chat_messages_with_step_limit as _core_append_with_step_limit, create_error_command as _core_create_error_command, - execution_output_text, ) +from cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_graph import build_agent_graph from cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_nodes import ( create_call_model_node as _create_shared_call_model_node, ) -from cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_graph import build_agent_graph from cuga.backend.cuga_graph.nodes.cuga_lite.agent_graph_adapter import AgentGraphAdapter -from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution_policy import ( - ExecutionRouter, - split_execution_note, -) -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ( - ToolProviderInterface, -) -from cuga.backend.cuga_graph.nodes.cuga_lite.model_runtime_profile import ( - resolved_runtime_model_name, - resolve_bind_tools_fields, -) -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_approval_handler import ToolApprovalHandler -from cuga.backend.cuga_graph.policy.enactment import PolicyEnactment +from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ToolProviderInterface +from cuga.backend.cuga_graph.state.agent_state import AgentState +from cuga.backend.llm.models import LLMManager +from cuga.backend.llm.utils.helpers import load_one_prompt from cuga.config import settings from cuga.configurations.instructions_manager import get_all_instructions_formatted -from cuga.backend.llm.utils.helpers import load_one_prompt -from cuga.backend.cuga_graph.nodes.cuga_lite.reflection.reflection import reflection_task - -from cuga.backend.skills import ( - SkillRegistry, - create_skill_tools, - discover_skills, - format_available_skills_block, -) - - -try: - from langfuse.langchain import CallbackHandler as LangfuseCallbackHandler -except ImportError: - try: - from langfuse.callback.langchain import LangchainCallbackHandler as LangfuseCallbackHandler - except ImportError: - LangfuseCallbackHandler = None - tracker = ActivityTracker() llm_manager = LLMManager() -def _tool_call_kwarg_literal(value: Any) -> str: - """Python expression for values reconstructed from JSON tool-call arguments.""" - if isinstance(value, str): - return json.dumps(value, ensure_ascii=False) - return repr(value) - - -def _extract_code_from_response_tool_calls(response: object) -> str | None: - """Recover fenced Python from AIMessage.tool_calls when content is empty (proxy/native FC).""" - tool_calls: list | None = getattr(response, "tool_calls", None) - if not tool_calls: - tool_calls = (getattr(response, "additional_kwargs", {}) or {}).get("tool_calls") - if not tool_calls: - return None - - tc = tool_calls[0] - if not isinstance(tc, dict): - return None - - name: str | None = tc.get("name") or (tc.get("function") or {}).get("name") - args: dict | str = tc.get("args") or (tc.get("function") or {}).get("arguments") or {} - if isinstance(args, str): - try: - args = json.loads(args) - except json.JSONDecodeError: - args = {} - - if not name: - return None - - args_str = ", ".join( - f"{k}={_tool_call_kwarg_literal(v)}" for k, v in (args if isinstance(args, dict) else {}).items() - ) - logger.debug("Recovered tool call '%s' from tool_calls field", name) - return f"```python\nresult = await {name}({args_str})\nprint(result)\n```" - - -def _bind_tools_mode_from_settings() -> str: - try: - m = getattr(settings.advanced_features, "cuga_lite_bind_tools_mode", None) - if m is not None and str(m).strip(): - return str(m).strip().lower() - except Exception: - pass - return "none" - - -def _bind_tools_apps_from_settings(): - try: - raw = getattr(settings.advanced_features, "cuga_lite_bind_tools_apps", None) - if raw is None: - return [] - if isinstance(raw, str): - return [raw.strip()] if raw.strip() else [] - if isinstance(raw, (list, tuple)): - return [str(x).strip() for x in raw if str(x).strip()] - except Exception: - pass - return [] - - -def _bind_tools_tool_names_from_settings(): - try: - raw = getattr(settings.advanced_features, "cuga_lite_bind_tools_tool_names", None) - if raw is None: - return [] - if isinstance(raw, str): - return [raw.strip()] if raw.strip() else [] - if isinstance(raw, (list, tuple)): - return [str(x).strip() for x in raw if str(x).strip()] - except Exception: - pass - return [] - - -def _bind_include_find_tools_from_config(cfg: Dict[str, Any]) -> bool: - v = cfg.get("cuga_lite_bind_tools_include_find_tools") - if v is None: - try: - v = getattr(settings.advanced_features, "cuga_lite_bind_tools_include_find_tools", False) - except Exception: - v = False - if isinstance(v, bool): - return v - if isinstance(v, str): - return v.strip().lower() in ("true", "1", "yes", "on") - return bool(v) - - -def _merge_find_tools_into_bound( - bound: List[StructuredTool], - seen: Set[str], - *, - include_find_tools: bool, - tools_context_ref: Optional[Dict[str, Any]], -) -> None: - if not include_find_tools: - return - ft = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") - if not ft: - return - name = getattr(ft, "name", None) or "" - if name and name not in seen: - seen.add(name) - bound.append(ft) - - -async def _indexed_provider_tools_first_wins( - tool_provider: ToolProviderInterface, -) -> Dict[str, StructuredTool]: - """Map tool name → StructuredTool using provider.get_all_tools (first occurrence wins).""" - try: - all_tools = await tool_provider.get_all_tools() - except Exception as e: - logger.warning("bind_tools: get_all_tools failed: %s", e) - return {} - by_name: Dict[str, StructuredTool] = {} - duplicates: Set[str] = set() - for t in all_tools or []: - n = getattr(t, "name", None) or "" - if not n: - continue - if n in by_name: - duplicates.add(n) - continue - by_name[n] = t - if duplicates: - logger.debug( - "bind_tools: duplicate tool names from provider (using first): %s", - sorted(duplicates), - ) - return by_name - - -async def _indexed_tools_for_native_bind( - tool_provider: ToolProviderInterface, - tools_context_ref: Optional[Dict[str, Any]], -) -> Dict[str, StructuredTool]: - """Registry MCP tools plus in-graph overlays (skills, OpenSandbox shell, todos, find_tools). - - ``run_command`` / ``write_file`` / etc. are not registered with ToolRegistryProvider; prepare - copies them onto ``tools_for_prompt`` only. Overlay must merge so ``cuga_lite_bind_tools_tool_names`` - can bind them by name. - """ - by_name = await _indexed_provider_tools_first_wins(tool_provider) - overlay = (tools_context_ref or {}).get("_lc_bind_tools_overlay_structured_tools") or [] - if not overlay: - return by_name - for t in overlay: - n = getattr(t, "name", None) or "" - if not n: - continue - by_name[n] = t - return by_name - - -async def resolve_model_with_bind_tools( - active_model: BaseChatModel, - *, - configurable: Optional[Dict[str, Any]], - tools_context_ref: Optional[Dict[str, Any]], - tool_provider: Optional[ToolProviderInterface], - model_name: Optional[str] = None, -) -> BaseChatModel: - """Optionally wrap ``active_model`` with ``bind_tools`` for native tool-calling tests. - - LangGraph ``config['configurable']`` overrides per-model runtime profile overrides TOML: - - - ``cuga_lite_bind_tools_mode``: ``none`` | ``find_tools`` | ``all`` | ``apps`` | ``tools`` | ``apps_and_tools`` - - ``cuga_lite_bind_tools_apps``: list of app names (``mode=apps`` or ``apps_and_tools``) - - ``cuga_lite_bind_tools_tool_names``: StructuredTool ``name`` values (``mode=tools`` or ``apps_and_tools``) - - ``cuga_lite_bind_tools_include_find_tools``: merge ``find_tools`` into ``all`` / ``apps`` / ``tools`` / ``apps_and_tools`` - - Profile ``gpt-oss-20b``: see ``model_runtime_profile.GPT_OSS_20B_RUNTIME_DEFAULTS``. - """ - cfg = configurable or {} - mn = (model_name or "").strip() - if not mn: - mn = resolved_runtime_model_name( - configurable_llm=cfg.get("llm"), - graph_default_model=active_model, - ) - mode, app_names, tool_names, include_find_tools = resolve_bind_tools_fields( - configurable, - mn, - settings_mode_fn=_bind_tools_mode_from_settings, - settings_apps_fn=_bind_tools_apps_from_settings, - settings_tool_names_fn=_bind_tools_tool_names_from_settings, - settings_include_fn=lambda: _bind_include_find_tools_from_config({}), - ) - - if mode in ("", "none", "false", "0", "off"): - if include_find_tools: - ft_only = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") - if ft_only: - return active_model.bind_tools([ft_only]) - return active_model - - try: - if mode == "find_tools": - ft = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") - if ft: - return active_model.bind_tools([ft]) - logger.debug( - "cuga_lite_bind_tools_mode=find_tools but find_tools StructuredTool is missing " - "(shortlisting may be off)" - ) - return active_model - - if mode == "all": - if not tool_provider: - logger.warning("cuga_lite_bind_tools_mode=all but tool_provider is missing") - return active_model - by_name = await _indexed_tools_for_native_bind(tool_provider, tools_context_ref) - bound = list(by_name.values()) - seen: Set[str] = {n for n in by_name} - _merge_find_tools_into_bound( - bound, seen, include_find_tools=include_find_tools, tools_context_ref=tools_context_ref - ) - if not bound: - return active_model - return active_model.bind_tools(bound) - - if mode == "apps_and_tools": - if not tool_provider: - logger.warning("cuga_lite_bind_tools_mode=apps_and_tools but tool_provider is missing") - return active_model - if not app_names and not tool_names: - if include_find_tools: - ft = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") - if ft: - return active_model.bind_tools([ft]) - logger.warning( - "cuga_lite_bind_tools_mode=apps_and_tools but cuga_lite_bind_tools_apps and " - "cuga_lite_bind_tools_tool_names are both empty " - "(set include_find_tools to bind find_tools only)" - ) - return active_model - - bound: List[StructuredTool] = [] - seen_names: Set[str] = set() - for app_name in app_names: - try: - for t in await tool_provider.get_tools(app_name): - name = getattr(t, "name", None) or "" - if name and name not in seen_names: - seen_names.add(name) - bound.append(t) - except Exception as e: - logger.warning("bind_tools apps_and_tools: get_tools(%s) failed: %s", app_name, e) - - by_name_lookup: Dict[str, StructuredTool] = {} - if tool_names: - by_name_lookup = await _indexed_tools_for_native_bind(tool_provider, tools_context_ref) - - missing: List[str] = [] - if tool_names: - for tn in tool_names: - if tn in seen_names: - continue - t = by_name_lookup.get(tn) - if t is None: - missing.append(tn) - continue - seen_names.add(tn) - bound.append(t) - if missing: - logger.warning( - "cuga_lite_bind_tools_tool_names not found among provider tools (skipped): %s", - missing, - ) - - _merge_find_tools_into_bound( - bound, seen_names, include_find_tools=include_find_tools, tools_context_ref=tools_context_ref - ) - if not bound: - return active_model - return active_model.bind_tools(bound) - - if mode == "apps": - if not app_names: - if include_find_tools: - ft = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") - if ft: - return active_model.bind_tools([ft]) - logger.warning( - "cuga_lite_bind_tools_mode=apps but cuga_lite_bind_tools_apps is empty " - "(set include_find_tools to bind find_tools only)" - ) - return active_model - if not tool_provider: - logger.warning("cuga_lite_bind_tools_mode=apps but tool_provider is missing") - return active_model - - bound = [] - seen: Set[str] = set() - for app_name in app_names: - try: - for t in await tool_provider.get_tools(app_name): - name = getattr(t, "name", None) or "" - if name and name not in seen: - seen.add(name) - bound.append(t) - except Exception as e: - logger.warning("bind_tools apps: get_tools(%s) failed: %s", app_name, e) - _merge_find_tools_into_bound( - bound, seen, include_find_tools=include_find_tools, tools_context_ref=tools_context_ref - ) - if not bound: - return active_model - return active_model.bind_tools(bound) - - if mode == "tools": - if not tool_names: - if include_find_tools: - ft = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") - if ft: - return active_model.bind_tools([ft]) - logger.warning( - "cuga_lite_bind_tools_mode=tools but cuga_lite_bind_tools_tool_names is empty " - "(set include_find_tools to bind find_tools only)" - ) - return active_model - if not tool_provider: - logger.warning("cuga_lite_bind_tools_mode=tools but tool_provider is missing") - return active_model - by_name = await _indexed_tools_for_native_bind(tool_provider, tools_context_ref) - if not by_name: - return active_model - bound = [] - seen: Set[str] = set() - missing: List[str] = [] - for tn in tool_names: - t = by_name.get(tn) - if t is None: - missing.append(tn) - continue - if tn not in seen: - seen.add(tn) - bound.append(t) - if missing: - logger.warning( - "cuga_lite_bind_tools_tool_names not found among provider tools (skipped): %s", - missing, - ) - _merge_find_tools_into_bound( - bound, seen, include_find_tools=include_find_tools, tools_context_ref=tools_context_ref - ) - if not bound: - return active_model - return active_model.bind_tools(bound) - - logger.warning( - "Unknown cuga_lite_bind_tools_mode: %s (use none|find_tools|all|apps|tools|apps_and_tools)", - mode, - ) - except Exception as e: - logger.warning("resolve_model_with_bind_tools failed: %s", e) - return active_model - - -def _clean_empty_response_retry_meta(meta: Optional[Dict[str, Any]]) -> Dict[str, Any]: - m = {**(meta or {})} - m.pop("_empty_response_correction", None) - return m - - -def _get_knowledge_tool_scope_context( - engine: Any | None, - thread_id: str | None, -) -> tuple[tuple[str, ...], str | None]: - config = getattr(engine, "_config", None) if engine else None - if not config or not getattr(config, "enabled", False): - return (), None - - scopes: list[str] = [] - if getattr(config, "agent_level_enabled", True): - scopes.append("agent") - if getattr(config, "session_level_enabled", True) and thread_id: - scopes.append("session") - - default_scope = "agent" if "agent" in scopes else scopes[0] if scopes else None - return tuple(scopes), default_scope - - -def _knowledge_scope_instruction(allowed_scopes: tuple[str, ...], thread_id: str | None) -> str: - if allowed_scopes == ("agent",): - return ( - "Knowledge scope rules for this run: only agent-level knowledge is available. " - "Never call `knowledge_*` tools with `scope=\"session\"`." - ) - if allowed_scopes == ("session",): - return ( - "Knowledge scope rules for this run: only session-level knowledge is available. " - "Never call `knowledge_*` tools with `scope=\"agent\"`. The conversation thread context is injected automatically." - ) - if allowed_scopes == ("agent", "session"): - return ( - "Knowledge scope rules for this run: both knowledge scopes are available. " - "Use `scope=\"agent\"` for permanent agent documents and `scope=\"session\"` for this conversation's documents." - ) - if thread_id: - return "Knowledge tools are unavailable in this run. Do not call any `knowledge_*` tool." - return ( - "Knowledge tools are unavailable in this run. " - "Session scope cannot be used here because there is no conversation thread context." - ) - - -def _decorate_knowledge_tool(tool: Any, allowed_scopes: tuple[str, ...], thread_id: str | None) -> None: - """Add a brief scope hint to the tool description. - - The full scope rules are already in the system instructions, so we only - add a short reminder here to avoid bloating the prompt with repeated text. - """ - base_description = getattr(tool, "description", "") or "Knowledge tool" - scopes_str = ", ".join(f'"{s}"' for s in allowed_scopes) - hint = f"Allowed scopes: {scopes_str}. See knowledge scope rules in instructions above." - tool.description = f"{base_description}\n\n{hint}".strip() - - class CugaLiteState(BaseModel): """State for CugaLite subgraph. @@ -644,41 +119,6 @@ def variables_manager(self): return StateVariablesManager(self) -def format_task_todos_system_block(todos: List[Dict[str, str]]) -> str: - """Append to system prompt so the model always sees the live todo list (not execution variables).""" - if not todos: - return "" - lines = [ - "", - "---", - "", - "## Current task todos", - "", - "Execution only prints **Todos updated** after each change; use this list as the source of truth.", - "", - ] - for i, item in enumerate(todos, start=1): - status = item.get("status", "pending") - text = item.get("text", "") - lines.append(f"{i}. **[{status}]** {text}") - lines.append("") - return "\n".join(lines) - - -def _reflection_current_task(state: CugaLiteState) -> str: - """Prefer ``sub_task``; else last user message that is not sandbox ``Execution output`` feedback.""" - if (state.sub_task or "").strip(): - return state.sub_task.strip() - if state.chat_messages: - execution_prefix = "Execution output:" - for msg in reversed(state.chat_messages): - if isinstance(msg, HumanMessage): - c = (msg.content or "").strip() - if c and not c.startswith(execution_prefix): - return c - return "" - - class _CugaLiteLoopAdapter(CoreGraphAdapter): """Lite seam: messages live on ``chat_messages``; step limit from ``configurable`` override else ``settings.advanced_features``.""" @@ -716,286 +156,6 @@ def create_error_command( ) -class Todo(BaseModel): - """A single todo item with text and status.""" - - text: str = Field(..., description="The task description") - status: str = Field( - default="pending", - description="Status of the todo: 'pending', 'in_progress', or 'completed'", - ) - - -class TodosInput(BaseModel): - """Input schema for create_update_todos function.""" - - todos: List[Todo] = Field(..., description="List of todos, each with 'text' and 'status' fields") - - -class TodosOutput(BaseModel): - """Output schema for create_update_todos function.""" - - todos: List[Todo] = Field(..., description="List of todos with their current status") - - -def _try_parse_todos_payload(value: Any) -> Optional[List[Dict[str, Any]]]: - if not isinstance(value, dict) or "todos" not in value: - return None - raw = value["todos"] - if not isinstance(raw, list): - return None - if not raw: - return [] - if not all(isinstance(x, dict) and "text" in x and "status" in x for x in raw): - return None - return raw - - -def extract_task_todos_from_new_vars(new_vars: dict[str, Any]) -> Optional[List[Dict[str, Any]]]: - for val in new_vars.values(): - parsed = _try_parse_todos_payload(val) - if parsed is not None: - return parsed - return None - - -def format_current_plan_section(task_todos: List[Dict[str, Any]]) -> str: - lines = ["## Current Plan", ""] - for item in task_todos: - text = str(item.get("text", "")).strip() - status = str(item.get("status", "pending")).strip() - lines.append(f"- **[{status}]** {text}") - return "\n".join(lines) + "\n" - - -def _first_user_message_text(chat_messages: Optional[List[BaseMessage]]) -> Optional[str]: - if not chat_messages: - return None - for msg in chat_messages: - if isinstance(msg, HumanMessage): - raw = msg.content - text = raw.strip() if isinstance(raw, str) else str(raw).strip() - return text or None - return None - - -def _compose_find_tools_shortlister_query(query: str, initial_user_message: Optional[str]) -> str: - q = query.strip() - init = (initial_user_message or "").strip() - if not init: - return q - return f"Query: {q}\nTask context (initial user message): {init}" - - -def _web_search_enabled() -> bool: - return bool(getattr(settings.advanced_features, "enable_web_search", False)) - - -def _ensure_web_app(apps: List[Any], all_apps: List[Any]) -> List[Any]: - if not _web_search_enabled() or any(getattr(app, "name", None) == "web" for app in apps): - return apps - web_app = next((app for app in all_apps if getattr(app, "name", None) == "web"), None) - if web_app: - return [*apps, web_app] - return apps - - -async def create_find_tools_tool( - all_tools: Sequence[StructuredTool], - all_apps: List[Any], - app_to_tools_map: Optional[Dict[str, List[StructuredTool]]] = None, - llm: Optional[Any] = None, - initial_user_message: Optional[str] = None, -) -> StructuredTool: - """Create a find_tools StructuredTool for tool discovery. - - Args: - all_tools: All available tools to search through - all_apps: All available app definitions - app_to_tools_map: Optional mapping of app_name -> list of tools. If provided, used for filtering by app_name. - initial_user_message: First human message in the session; combined with the tool `query` for shortlisting. - - Returns: - StructuredTool configured for finding relevant tools - """ - - async def find_tools_func(query: str, app_name: str): - """Search for relevant tools from the connected applications based on a natural language query. - - Args: - query: Natural language query describing what tools are needed to accomplish the task can include also which parameters are needed or the output expected - app_name: Name of a specific app to filter tools from. Only searches tools from that app. - - Returns: - Top 4 matching tools with their details - """ - if app_to_tools_map and app_name in app_to_tools_map: - filtered_tools = app_to_tools_map[app_name] - else: - logger.warning( - f"App '{app_name}' not found in app_to_tools_map. Available apps: {list(app_to_tools_map.keys()) if app_to_tools_map else 'N/A'}" - ) - filtered_tools = [] - - filtered_apps = [app for app in all_apps if hasattr(app, 'name') and app.name == app_name] - - if not filtered_apps: - logger.warning( - f"App '{app_name}' not found in available apps. Available apps: {[app.name if hasattr(app, 'name') else str(app) for app in all_apps]}" - ) - - from langchain_core.exceptions import OutputParserException - - shortlister_query = _compose_find_tools_shortlister_query(query, initial_user_message) - - try: - return await PromptUtils.find_tools( - query=shortlister_query, all_tools=filtered_tools, all_apps=filtered_apps, llm=llm - ) - except OutputParserException as e: - logger.bind( - query_len=len(shortlister_query), - error_type=type(e).__name__, - ).opt(exception=True).warning( - "Tool shortlisting failed due to parser error; returning error to agent" - ) - return ( - f"Tool shortlisting failed due to malformed response: {e}. " - "Please retry with a different query." - ) - except Exception as e: - logger.bind( - query_len=len(shortlister_query), - error_type=type(e).__name__, - ).opt(exception=True).warning("Tool shortlisting failed unexpectedly; returning error to agent") - return ( - f"Tool shortlisting failed due to an internal error: {e}. " - "Please retry with a different query." - ) - - return StructuredTool.from_function( - func=find_tools_func, - name="find_tools", - description="Search for relevant tools from a specific connected application based on a natural language query. Use this when you need to discover what tools are available for a specific task within a specific application.", - ) - - -def _serialize_todos_for_store(todos_list: List[Any]) -> List[Dict[str, str]]: - out: List[Dict[str, str]] = [] - for t in todos_list: - if isinstance(t, Todo): - out.append({"text": t.text, "status": t.status}) - elif hasattr(t, "model_dump"): - d = t.model_dump() - out.append({"text": str(d.get("text", "")), "status": str(d.get("status", "pending"))}) - elif isinstance(t, dict): - out.append({"text": str(t.get("text", "")), "status": str(t.get("status", "pending"))}) - else: - out.append({"text": str(t), "status": "pending"}) - return out - - -async def create_update_todos_tool( - agent_state: Optional['AgentState'] = None, - todos_store_ref: Optional[List[Dict[str, str]]] = None, -) -> StructuredTool: - """Create a create_update_todos StructuredTool for managing task todos. - - Args: - agent_state: Optional AgentState (reserved for future use) - todos_store_ref: Mutable list shared with the graph; latest todos are written here for the system prompt. - - Returns: - StructuredTool configured for creating and updating todos - """ - - async def create_update_todos_func(todos: Any) -> TodosOutput: - """Create or update a list of todos for complex multi-step tasks. - - Use this tool when you have a complex task that requires multiple steps. - This helps you track progress and organize your work. - - Args: - todos: List of todo dicts/models (matches ``TodosInput.todos`` / tool schema). - - Returns: - Short confirmation only (full list is shown in the system prompt via todos_store_ref). - """ - input_data = todos - # Handle different input types - if isinstance(input_data, TodosInput): - todos_list = input_data.todos - elif isinstance(input_data, dict): - # If it's a dict, check if it has 'todos' key - if 'todos' in input_data: - todos_list = input_data['todos'] - else: - # If no 'todos' key, treat the whole dict as a single todo or wrap it - todos_list = [input_data] - # Convert dict items to Todo models - todos_list = [Todo(**todo) if isinstance(todo, dict) else todo for todo in todos_list] - elif isinstance(input_data, list): - # If it's a list directly, convert each item to Todo - todos_list = [Todo(**todo) if isinstance(todo, dict) else todo for todo in input_data] - else: - # Fallback: try to create TodosInput - try: - if isinstance(input_data, dict): - input_data = TodosInput(**input_data) - else: - input_data = TodosInput(todos=input_data) - todos_list = input_data.todos - except Exception: - # Last resort: wrap in a list - todos_list = [Todo(**input_data) if isinstance(input_data, dict) else input_data] - - if todos_store_ref is not None: - serialized = _serialize_todos_for_store(todos_list) - todos_store_ref.clear() - todos_store_ref.extend(serialized) - - normalized = [t if isinstance(t, Todo) else Todo(**t) for t in todos_list] - return TodosOutput(todos=normalized) - - return StructuredTool.from_function( - func=create_update_todos_func, - name="create_update_todos", - description="Create or update a list of todos for complex multi-step tasks. Pass `todos` as a list of objects with 'text' and 'status' ('pending', 'in_progress', or 'completed'). Returns a todos payload; the full list is shown in the system prompt under 'Current task todos' (Current Plan).", - args_schema=TodosInput, - return_direct=False, - ) - - -_BUNDLED_FIND_TOOLS_FEW_SHOT_JSON = ( - Path(__file__).resolve().parent / "prompts" / "find_tools_few_shot_examples.json" -) - - -def _resolve_find_tools_few_shot_json_path() -> Optional[Path]: - if _BUNDLED_FIND_TOOLS_FEW_SHOT_JSON.is_file(): - return _BUNDLED_FIND_TOOLS_FEW_SHOT_JSON - return None - - -def _load_default_find_tools_few_shot_examples() -> List[Dict[str, str]]: - path = _resolve_find_tools_few_shot_json_path() - if path is None: - logger.debug( - "Find-tools few-shot JSON not found (expected packaged %s or repo samples copy); skipping", - _BUNDLED_FIND_TOOLS_FEW_SHOT_JSON, - ) - return [] - try: - raw = json.loads(path.read_text(encoding="utf-8")) - normalized = normalize_mcp_few_shot_examples(raw) - if normalized: - logger.info(f"Loaded {len(normalized)} find_tools MCP few-shot turn(s) from {path}") - return normalized - except (OSError, json.JSONDecodeError) as e: - logger.warning(f"Could not load find_tools few-shot JSON from {path}: {e}") - return [] - - def create_cuga_lite_graph( model: BaseChatModel, prompt: Optional[str] = None, @@ -1012,858 +172,37 @@ def create_cuga_lite_graph( enable_todos and reflection_enabled are read from config["configurable"] at runtime. Fallback to settings.advanced_features when not provided. - - Args: - model: The language model to use - prompt: Optional static prompt (if None, will be created dynamically from state) - tool_provider: Tool provider interface for accessing tools - apps_list: List of app names for tool context - agent_state: Optional AgentState for variables management - thread_id: Thread ID for E2B sandbox caching - callbacks: Optional list of callback handlers - special_instructions: Optional special instructions to add to the prompt - - Returns: - StateGraph implementing the CugaLite architecture """ prompts_dir = Path(__file__).parent / "prompts" prompt_template = load_one_prompt(str(prompts_dir / "mcp_prompt.jinja2"), relative_to_caller=False) instructions = get_all_instructions_formatted() - def create_prepare_node( - base_tool_provider, - base_prompt_template, - base_instructions, - tools_context_dict, - base_special_instructions, - task_todos_ref: List[Dict[str, str]], - lc_bind_tools_meta: Dict[str, Any] = None, - ): - """Factory to create prepare node with closure over tool provider and config.""" - - async def prepare_tools_and_apps( - state: CugaLiteState, config: Optional[RunnableConfig] = None - ) -> Command: - """Prepare tools, apps, and prompt once at the start of the graph. - - This node gets tools from tool_provider, filters based on state configuration, - determines if find_tools should be enabled, and prepares the prompt. - Tools are available via closure (per graph instance), prompt is stored in state. - - enable_todos is read from config["configurable"] at runtime. - - Optional configurable key ``mcp_few_shot_examples``: overrides few-shots—a JSON string or - list of dicts with ``role`` and ``content``. If absent (or explicitly ``None``) and - ``find_tools`` is enabled, ``prompts/find_tools_few_shot_examples.json`` (bundled next to the - MCP template) is loaded, with optional fallback to repo ``samples/cuga_lite/mcp_few_shot_examples.json``. - Bundled few-shots only apply when ``find_tools`` shortlisting is active - (``total_tool_count > shortlisting_tool_threshold``, see settings configurable). - - Disable few-shots entirely via ``advanced_features.cuga_lite_enable_few_shots`` in settings.toml - or ``cuga_lite_enable_few_shots`` in configurable (skips prefix chat few-shots). - """ - configurable = config.get("configurable", {}) if config else {} - enable_todos = ( - configurable.get("enable_todos") - if "enable_todos" in configurable - else settings.advanced_features.enable_todos - ) - shortlisting_threshold = ( - configurable.get("shortlisting_tool_threshold") - if "shortlisting_tool_threshold" in configurable - else settings.advanced_features.shortlisting_tool_threshold - ) - _runtime_model_name = resolved_runtime_model_name( - configurable_llm=configurable.get("llm"), - graph_default_model=model, - ) - few_shots_enabled = resolve_cuga_lite_few_shots_enabled( - configurable, - model_name=_runtime_model_name, - ) - logger.debug( - f"[APPROVAL DEBUG] prepare_tools_and_apps received cuga_lite_metadata: {state.cuga_lite_metadata}" - ) - - # Skip policy checking if policies are disabled or if we're returning from approval - if settings.policy.enabled and not ToolApprovalHandler.should_skip_policy_check( - _LITE_LOOP_ADAPTER, state - ): - # Check for policies and enact if matched - # Include IntentGuard, Playbook, and ToolGuide for intent checks - from cuga.backend.cuga_graph.policy.models import PolicyType - - command, metadata = await PolicyEnactment.check_and_enact( - state, - config, - policy_types=[PolicyType.INTENT_GUARD, PolicyType.PLAYBOOK, PolicyType.TOOL_GUIDE], - adapter=_LITE_LOOP_ADAPTER, - ) - - # If policy returned a command (e.g., BLOCK_INTENT), execute it immediately - if command: - return command - - # If policy returned metadata (e.g., playbook guidance), store it - if metadata: - _LITE_LOOP_ADAPTER.set_metadata(state, metadata) - elif not settings.policy.enabled: - logger.debug("Policy system disabled - skipping policy checks") - else: - logger.info("[APPROVAL DEBUG] Skipping policy check - user has already approved") - - if not base_tool_provider: - raise ValueError("tool_provider is required") - - # Get total tool count across ALL apps (for shortlisting threshold - not per app) - all_tools_total = await base_tool_provider.get_all_tools() - total_tool_count = len(all_tools_total) if all_tools_total else 0 - - # Get tools from provider - apps_for_prompt = None - app_to_tools_map = {} - - # Get apps from state and filter tools if specific app is selected - if state.sub_task_app: - # Specific app selected - filter tools to only this app - all_apps = await base_tool_provider.get_apps() - # add here the implementation of force_ - force_lite_apps = getattr(settings.advanced_features, 'force_lite_mode_apps', []) - if force_lite_apps: - allowed_apps_names = list(set([state.sub_task_app] + force_lite_apps)) - if _web_search_enabled(): - allowed_apps_names.append("web") - # call authenticate_apps for the allowed apps - if settings.advanced_features.benchmark == "appworld": - await TaskAnalyzer.call_authenticate_apps(force_lite_apps) - apps_for_prompt = [app for app in all_apps if app.name in allowed_apps_names] - else: - apps_for_prompt = [app for app in all_apps if app.name == state.sub_task_app] - apps_for_prompt = _ensure_web_app(apps_for_prompt, all_apps) - # Get only tools for this specific app - tools_for_execution = [] - for app in apps_for_prompt: - current_tools_for_execution = await base_tool_provider.get_tools(app.name) - app_to_tools_map[app.name] = current_tools_for_execution - tools_for_execution.extend(current_tools_for_execution) - - logger.info( - f"Filtered to {len(tools_for_execution)} tools for {len(apps_for_prompt)} identified apps" - ) - elif state.api_intent_relevant_apps: - # Filter to API apps - all_apps = await base_tool_provider.get_apps() - apps_for_prompt = [ - app - for app in state.api_intent_relevant_apps - if hasattr(app, 'type') and app.type == 'api' - ] - apps_for_prompt = _ensure_web_app(apps_for_prompt, all_apps) - # Get tools only for the identified apps - tools_for_execution = [] - for app in apps_for_prompt: - app_tools = await base_tool_provider.get_tools(app.name) - app_to_tools_map[app.name] = app_tools - tools_for_execution.extend(app_tools) - logger.info( - f"Filtered to {len(tools_for_execution)} tools for {len(apps_for_prompt)} identified apps" - ) - else: - # Get all tools and apps - all_apps = await base_tool_provider.get_apps() - apps_for_prompt = all_apps - tools_for_execution = all_tools_total or [] - # Build mapping for all apps - for app in apps_for_prompt: - app_tools = await base_tool_provider.get_tools(app.name) - app_to_tools_map[app.name] = app_tools - - enable_find_tools = total_tool_count > shortlisting_threshold or _web_search_enabled() - - if enable_find_tools: - logger.info( - f"Auto-enabling find_tools: total {total_tool_count} tools (across all apps) exceeds threshold of {shortlisting_threshold}" - ) - - # Prepare prompt - is_autonomous_subtask = state.sub_task is not None and state.sub_task.strip() != "" - - # TODO: Add task loaded from file support this happens when we load file as playboook - task_loaded_from_file = False # Not used in current flow - - # Prepare tools for prompt - if find_tools enabled, only expose find_tools - tools_for_prompt = tools_for_execution - if enable_find_tools: - active_model = configurable.get("llm") - find_tool = await create_find_tools_tool( - all_tools=tools_for_execution, - all_apps=apps_for_prompt, - app_to_tools_map=app_to_tools_map, - llm=active_model, - initial_user_message=_first_user_message_text(state.chat_messages), - ) - tools_for_prompt = [find_tool] - # Add find_tools to tools context for sandbox execution - # Wrap to make awaitable (agent always uses await) - # Prefer coroutine over func to avoid run_in_executor issues - find_tool_func = ( - find_tool.coroutine - if hasattr(find_tool, 'coroutine') and find_tool.coroutine - else find_tool.func - ) - tools_context_dict['find_tools'] = make_tool_awaitable(find_tool_func) - if lc_bind_tools_meta is not None: - lc_bind_tools_meta["_lc_bind_tools_find_tools"] = find_tool - logger.info( - "Exposing only find_tools in prompt (all tools + find_tools available in execution context)" - ) - - if few_shots_enabled: - if "mcp_few_shot_examples" in configurable: - raw_fs = configurable["mcp_few_shot_examples"] - if raw_fs is not None: - few_shot_examples = normalize_mcp_few_shot_examples(raw_fs) - elif enable_find_tools: - few_shot_examples = _load_default_find_tools_few_shot_examples() - else: - few_shot_examples = [] - elif enable_find_tools: - few_shot_examples = _load_default_find_tools_few_shot_examples() - else: - few_shot_examples = [] - logger.debug( - "Bundled MCP few-shots (prompts/find_tools_few_shot_examples.json) not loaded: find_tools " - "is off " - f"(total_tool_count={total_tool_count} <= shortlisting_tool_threshold=" - f"{shortlisting_threshold}). Lower the threshold via configurable or add apps/tools." - ) - else: - few_shot_examples = [] - logger.debug("MCP few-shots disabled (cuga_lite_enable_few_shots=false)") - if few_shot_examples: - logger.debug(f"MCP few-shot examples: {len(few_shot_examples)} turns") - - # Add create_update_todos tool for complex task management if enabled - if enable_todos: - todos_tool = await create_update_todos_tool(agent_state=state, todos_store_ref=task_todos_ref) - tools_for_prompt.append(todos_tool) - # Add to tools context for sandbox execution - # Prefer coroutine over func to avoid run_in_executor issues - todos_tool_func = ( - todos_tool.coroutine - if hasattr(todos_tool, 'coroutine') and todos_tool.coroutine - else todos_tool.func - ) - tools_context_dict['create_update_todos'] = make_tool_awaitable(todos_tool_func) - - # Apply tool guide if guides exist in metadata and haven't been applied yet - # Guides should apply regardless of whether a playbook matched - if settings.policy.enabled and state.cuga_lite_metadata: - # Check if guides exist (either as separate guides list or legacy format) - has_guides = ( - state.cuga_lite_metadata.get("guides") - or state.cuga_lite_metadata.get("guide_content") - or state.cuga_lite_metadata.get("policy_type") == "tool_guide" - or state.cuga_lite_metadata.get("has_guides", False) - ) - - if has_guides: - tools_for_execution = PolicyEnactment.apply_tool_guide( - tools_for_execution, state.cuga_lite_metadata - ) - tools_for_prompt = PolicyEnactment.apply_tool_guide( - tools_for_prompt, state.cuga_lite_metadata - ) - # Mark guides as applied to prevent re-application - state.cuga_lite_metadata["guides_applied"] = True - logger.info("Applied tool guide from policy") - else: - logger.debug("No tool guides found in metadata") - - skill_tools = [] - skills_prompt_section = "" - skills_enabled = False - configurable_special = ( - (config or {}).get("configurable", {}).get("special_instructions") if config else None - ) - effective_special = base_special_instructions or configurable_special or "" - skills_cfg_on = getattr(settings.skills, "enabled", False) - cuga_folder_for_skills = os.getenv("CUGA_FOLDER", settings.policy.cuga_folder) - if skills_cfg_on: - skill_entries = discover_skills(cuga_folder_for_skills) - if skill_entries: - skill_registry = SkillRegistry(skill_entries) - skill_tools = create_skill_tools(skill_registry) - tools_for_prompt.extend(skill_tools) - skills_prompt_section = format_available_skills_block(skill_registry) - skills_enabled = True - logger.info( - f"Loaded {len(skill_entries)} agent skill(s) from .agents/skills and " - f"~/.config/agents/skills with legacy {cuga_folder_for_skills}/skills and " - "~/.config/cuga/skills fallbacks" - ) - - # Resolve thread_id early for per-thread workspace selection. - _cfg_for_thread = config.get("configurable", {}) if config else {} - _runtime_thread_id_for_fs = _cfg_for_thread.get("thread_id") or state.thread_id or thread_id - - # Update tools context with all execution tools. - # Wrap to make awaitable (agent always uses await). Filesystem path - # rewriting is no longer needed here — filesystem tools come from - # the consolidated runtime class below, not from MCP. - for tool in tools_for_execution: - # Extract tool function - StructuredTool may use .func, .coroutine, or ._run - # IMPORTANT: Prefer coroutine over func to avoid run_in_executor issues - # with tools that have async implementations (like MCP tools) - tool_func = None - if hasattr(tool, 'coroutine') and tool.coroutine: - # Prefer async coroutine - avoids run_in_executor timeout issues - tool_func = tool.coroutine - elif hasattr(tool, 'func') and tool.func: - tool_func = tool.func - else: - tool_func = getattr(tool, '_run', None) - - if tool_func: - tools_context_dict[tool.name] = make_tool_awaitable(tool_func) - else: - logger.warning(f"Tool '{tool.name}' has no callable function, skipping") - - for tool in skill_tools: - tool_func = None - if hasattr(tool, "coroutine") and tool.coroutine: - tool_func = tool.coroutine - elif hasattr(tool, "func") and tool.func: - tool_func = tool.func - else: - tool_func = getattr(tool, "_run", None) - if tool_func: - tools_context_dict[tool.name] = make_tool_awaitable(tool_func) - else: - logger.warning(f"Skill tool '{tool.name}' has no callable, skipping") - - # Inject the consolidated filesystem tools + run_command via the - # shared runtime_tools orchestrator. Backend selection and gating - # live in cuga_agent_core (behavior-identical to the previous - # inline block); filesystem and run_command remain independently - # gated by enable_filesystem_tools / enable_shell_tool. - _runtime_backends = resolve_runtime_backends(settings, configurable) - - if _runtime_backends.filesystem != "none" or _runtime_backends.shell != "none": - cfg = config.get("configurable", {}) if config else {} - runtime_thread_id = cfg["thread_id"] if "thread_id" in cfg else (state.thread_id or thread_id) - else: - runtime_thread_id = None - - _runtime_bundle = build_runtime_tools(thread_id=runtime_thread_id, backends=_runtime_backends) - tools_context_dict.update(_runtime_bundle.execution_callables) - tools_for_prompt.extend(_runtime_bundle.prompt_tools) - if _runtime_bundle.app_definitions and apps_for_prompt is not None: - apps_for_prompt = list(apps_for_prompt) + _runtime_bundle.app_definitions - - from cuga.backend.evolve.memory import build_evolve_special_instructions_extension - - special_instructions_final = effective_special or "" - _split_note = split_execution_note(ExecutionRouter.resolve(settings)) - if _split_note: - special_instructions_final = (special_instructions_final + "\n\n" + _split_note).strip() - evolve_extension = await build_evolve_special_instructions_extension( - state=state, - configurable=configurable, - timeout=settings.evolve.timeout, - ) - if evolve_extension: - special_instructions_final = (special_instructions_final or "") + evolve_extension - - cfg = config.get("configurable", {}) if config else {} - _thread_id = cfg.get("thread_id") or "" - _knowledge_engine = cfg.get("knowledge_engine") - if _knowledge_engine is None: - try: - from cuga.backend.server.main import app as _app - - _app_state = getattr(_app.state, "app_state", None) - _knowledge_engine = getattr(_app_state, "knowledge_engine", None) if _app_state else None - except Exception: - _knowledge_engine = None - - allowed_knowledge_scopes, default_knowledge_scope = _get_knowledge_tool_scope_context( - _knowledge_engine, - _thread_id or None, - ) - - knowledge_tool_names = { - tool.name - for tool in tools_for_execution - if getattr(tool, "name", "").startswith("knowledge_") - } - - if knowledge_tool_names and not allowed_knowledge_scopes: - tools_for_execution = [ - tool - for tool in tools_for_execution - if getattr(tool, "name", "") not in knowledge_tool_names - ] - tools_for_prompt = [ - tool for tool in tools_for_prompt if getattr(tool, "name", "") not in knowledge_tool_names - ] - apps_for_prompt = [ - app for app in (apps_for_prompt or []) if getattr(app, "name", "") != "knowledge" - ] - for tool_name in knowledge_tool_names: - tools_context_dict.pop(tool_name, None) - elif knowledge_tool_names: - if _thread_id: - logger.debug("Knowledge tools: thread context available for session scope injection") - - def _wrap_knowledge_tool(fn, tid, allowed_scopes, default_scope): - async def _wrapped(*args, **kwargs): - scope = kwargs.get("scope") - if scope is None and default_scope: - kwargs["scope"] = default_scope - scope = default_scope - if scope is not None and scope not in allowed_scopes: - allowed_text = ", ".join(allowed_scopes) - return { - "error": ( - f"Knowledge scope '{scope}' is unavailable in this context. " - f"Allowed scopes: {allowed_text}" - ) - } - if tid and "session" in allowed_scopes: - kwargs.setdefault("thread_id", tid) - return await fn(*args, **kwargs) - - _wrapped.__doc__ = getattr(fn, "__doc__", None) - _wrapped._knowledge_allowed_scopes = allowed_scopes - _wrapped._knowledge_default_scope = default_scope - _wrapped._knowledge_thread_id = tid - return _wrapped - - for tool_name in knowledge_tool_names: - original_fn = tools_context_dict.get(tool_name) - if original_fn: - tools_context_dict[tool_name] = _wrap_knowledge_tool( - original_fn, - _thread_id, - allowed_knowledge_scopes, - default_knowledge_scope, - ) - - # Note: scope rules are injected once via effective_instructions. - # No per-tool decoration needed — avoids repeated text in prompt. - - # Inject knowledge base awareness if knowledge tools are available - effective_instructions = base_instructions - # Detect knowledge tools — works for both registry (app named - # "knowledge") and SDK mode (tools under "runtime_tools") - has_knowledge_tools = any( - getattr(app, "name", "") == "knowledge" for app in (apps_for_prompt or []) - ) - if not has_knowledge_tools and tools_for_execution: - has_knowledge_tools = any( - getattr(t, "name", "").startswith("knowledge_") for t in tools_for_execution - ) - knowledge_scope_instruction = _knowledge_scope_instruction( - allowed_knowledge_scopes, - _thread_id or None, - ) - if knowledge_tool_names: - effective_instructions = ( - f"{knowledge_scope_instruction}\n\n{effective_instructions}" - if effective_instructions - else knowledge_scope_instruction - ) - if has_knowledge_tools: - try: - from cuga.backend.knowledge.awareness import ( - get_knowledge_summary, - format_knowledge_context, - get_engine_from_app_state, - ) - - cfg = config.get("configurable", {}) - engine = cfg.get("knowledge_engine") or get_engine_from_app_state() - # Get agent_id: configurable > app_state > fallback - agent_id = cfg.get("agent_id") - knowledge_config_hash = cfg.get("knowledge_config_hash") - if not agent_id: - try: - from cuga.backend.server.main import app as _app - - _as = getattr(_app.state, "app_state", None) - agent_id = getattr(_as, "agent_id", None) if _as else None - if knowledge_config_hash is None: - knowledge_config_hash = ( - getattr(_as, "knowledge_config_hash", None) if _as else None - ) - except Exception: - pass - if not agent_id: - agent_id = "cuga-default" - awareness_thread_id = cfg.get("thread_id") - kb_ctx = format_knowledge_context( - agent_id, - awareness_thread_id, - engine=engine, - agent_config_hash=knowledge_config_hash, - ) - logger.info( - f"Knowledge awareness: agent_id={agent_id}, thread_id={awareness_thread_id}, " - f"agent_collection={kb_ctx.get('agent_collection')}, " - f"session_collection={kb_ctx.get('session_collection')}" - ) - - if not engine: - logger.warning("Knowledge awareness skipped: engine not available") - else: - # Use draft knowledge config for search-time params when running - # in draft mode (Try-It-Out). Published agent always uses engine config. - _search_cfg = engine._config - _is_draft = agent_id and agent_id.endswith("--draft") - if _is_draft: - try: - from cuga.backend.server.main import app as _app - - _das = getattr(_app.state, "draft_app_state", None) - _draft_kc = getattr(_das, "draft_knowledge_config", None) if _das else None - if _draft_kc: - _search_cfg = _draft_kc - except Exception: - pass - knowledge_block = await get_knowledge_summary( - engine, - agent_collection=kb_ctx.get("agent_collection"), - session_collection=kb_ctx.get("session_collection"), - max_search_attempts=getattr(_search_cfg, "max_search_attempts", None) - or getattr(engine._config, "max_search_attempts", None), - default_limit=getattr(_search_cfg, "default_limit", None) - or getattr(engine._config, "default_limit", None), - rag_profile=getattr(_search_cfg, "rag_profile", None) - or getattr(engine._config, "rag_profile", "standard"), - ) - if knowledge_block: - # Load knowledge search instructions from dedicated file - knowledge_instructions_text = "" - try: - kb_instructions_path = ( - Path(__file__).parents[4] - / "configurations" - / "knowledge" - / "knowledge_instructions.md" - ) - if kb_instructions_path.exists(): - knowledge_instructions_text = kb_instructions_path.read_text( - encoding="utf-8" - ).strip() - except Exception as ki_err: - logger.debug(f"Failed to load knowledge instructions: {ki_err}") - - # Prepend knowledge block BEFORE other instructions - # so the LLM sees it early and acts on it - effective_instructions = ( - f"{knowledge_block}\n\n{knowledge_instructions_text}\n\n{effective_instructions}" - if effective_instructions - else f"{knowledge_block}\n\n{knowledge_instructions_text}" - ) - logger.info(f"Knowledge awareness injected: {len(knowledge_block)} chars") - except Exception as e: - logger.debug(f"Knowledge awareness injection skipped: {e}") - if lc_bind_tools_meta is not None: - lc_bind_tools_meta["_lc_bind_tools_overlay_structured_tools"] = [ - t for t in (tools_for_prompt or []) if getattr(t, "name", None) - ] - - # Create prompt dynamically - dynamic_prompt = prompt - - if not dynamic_prompt: - dynamic_prompt = create_mcp_prompt( - tools_for_prompt, - allow_user_clarification=True, - return_to_user_cases=None, - instructions=effective_instructions, - apps=apps_for_prompt, - task_loaded_from_file=task_loaded_from_file, - is_autonomous_subtask=settings.advanced_features.force_autonomous_mode - or is_autonomous_subtask, - prompt_template=base_prompt_template, - enable_find_tools=enable_find_tools, - enable_todos=enable_todos, - special_instructions=special_instructions_final, - skills_enabled=skills_enabled, - skills_prompt_section=skills_prompt_section, - enable_shell_tool=getattr(settings.advanced_features, "enable_shell_tool", False), - has_knowledge=has_knowledge_tools, - few_shot_examples=few_shot_examples, - few_shots_enabled=few_shots_enabled, - ) - logger.info( - "Prepared CugaLite prompt: enable_find_tools={} few_shot_message_turns={} " - "few_shots_as_messages={} prompt_chars={}", - enable_find_tools, - len(few_shot_examples), - bool(few_shot_examples), - len(dynamic_prompt), - ) - else: - logger.info( - "Using static CugaLite prompt; dynamic few-shot injection skipped " - "(enable_find_tools={} few_shot_turns={})", - enable_find_tools, - len(few_shot_examples), - ) - - reflection_apps_snapshot = format_apps_for_prompt(apps_for_prompt or []) - - return Command( - goto="call_model", - update={ - "tools_prepared": True, - "prepared_prompt": dynamic_prompt, - "step_count": 0, - "cuga_lite_metadata": state.cuga_lite_metadata, - "reflection_apps": reflection_apps_snapshot, - "reflection_enable_find_tools": enable_find_tools, - "reflection_skills_enabled": skills_enabled, - "reflection_skills_prompt_section": skills_prompt_section, - "mcp_few_shot_messages": few_shot_examples, - }, - ) - - return prepare_tools_and_apps - - # Factory function to create sandbox node with access to tools context - def create_sandbox_node(base_tools_context, base_thread_id, base_apps_list): - """Factory to create sandbox node with closure over tools context and config.""" - - async def sandbox(state: CugaLiteState, config: Optional[RunnableConfig] = None): - """Execute code in sandbox and return results.""" - from cuga.backend.cuga_graph.nodes.cuga_lite.tool_call_tracker import ToolCallTracker - - # Check if user denied approval (only if policies are enabled) - if settings.policy.enabled: - denial_command = ToolApprovalHandler.handle_denial(_LITE_LOOP_ADAPTER, state) - if denial_command: - return denial_command - - configurable = config.get("configurable", {}) if config else {} - max_steps = ( - configurable.get("cuga_lite_max_steps") if "cuga_lite_max_steps" in configurable else None - ) - if "thread_id" in configurable: - current_thread_id = configurable["thread_id"] - else: - current_thread_id = state.thread_id or base_thread_id - current_apps_list = configurable.get("apps_list", base_apps_list) - track_tool_calls = configurable.get("track_tool_calls", False) - reflection_enabled = ( - configurable.get("reflection_enabled") - if "reflection_enabled" in configurable - else settings.advanced_features.reflection_enabled - ) - - # Get existing variables using CugaLiteState's own variables_manager - existing_vars = {} - for var_name in list(state.variables_manager.get_variable_names()): - var_value = state.variables_manager.get_variable(var_name) - if is_find_tools_listing_markdown(var_value): - state.variables_manager.remove_variable(var_name) - continue - existing_vars[var_name] = var_value - - # Add tools to context - context = {**existing_vars, **base_tools_context} - - # Start tool call tracking (only if enabled via invoke parameter) - ToolCallTracker.start_tracking(enabled=track_tool_calls) - - try: - # Execute the script - pass the CugaLiteState itself since it has variables_manager - _exec_plan = ExecutionRouter.resolve(settings) - if _exec_plan.split_execution_active: - logger.info( - "Split execution: python=%s shell=%s fs=%s", - _exec_plan.python_backend, - _exec_plan.shell_backend, - _exec_plan.filesystem_backend, - ) - output, new_vars = await CodeExecutor.eval_with_tools_async( - code=state.script, - _locals=context, - state=state, # Pass CugaLiteState - it has variables_manager property - thread_id=current_thread_id, - apps_list=current_apps_list, - plan=_exec_plan, - ) - - tracker.collect_step(step=Step(name="User_output", data=output)) - tracker.collect_step( - step=Step( - name="User_output_variables", - data=json.dumps( - new_vars, - default=lambda o: o.model_dump() if hasattr(o, "model_dump") else str(o), - ), - ) - ) - - # Output is already formatted and trimmed by code_executor - logger.debug(f"\n\n------\n\n📝 Execution output:\n\n{output}\n\n------\n\n") - - # Update variables using CugaLiteState's variables_manager - # This automatically updates state.variables_storage - for name, value in new_vars.items(): - if is_find_tools_listing_markdown(value): - continue - state.variables_manager.add_variable( - value, name=name, description="Created during code execution" - ) - - reflection_output = "" - if reflection_enabled: - try: - active_model = configurable.get("llm") or llm_manager.get_model( - settings.agent.planner.model - ) - reflection_agent = reflection_task(llm=active_model) - # Format chat messages as history string - agent_history_parts = [] - for msg in state.chat_messages: - if isinstance(msg, HumanMessage): - agent_history_parts.append(f"User: {msg.content}") - elif isinstance(msg, AIMessage): - agent_history_parts.append(f"Assistant: {msg.content}") - else: - agent_history_parts.append( - f"{type(msg).__name__}: {getattr(msg, 'content', str(msg))}" - ) - agent_history = ( - "\n".join(agent_history_parts) - if agent_history_parts - else "No previous conversation history" - ) - reflection_result = await reflection_agent.ainvoke( - { - "instructions": "", - "current_task": _reflection_current_task(state) or "(no task text)", - "agent_history": agent_history, - "coder_agent_output": output, - "apps": state.reflection_apps or [], - "enable_find_tools": state.reflection_enable_find_tools, - "skills_enabled": state.reflection_skills_enabled, - "skills_prompt_section": state.reflection_skills_prompt_section, - "force_autonomous_mode": settings.advanced_features.force_autonomous_mode, - } - ) - reflection_output = reflection_result.content - logger.debug(f"Reflection output:\n{reflection_output}") - except Exception as e: - logger.warning(f"Reflection failed: {e}") - reflection_output = "" - - # Output is already formatted by code_executor - execution_message_content = execution_output_text(output) - if reflection_output: - execution_message_content = ( - f"{execution_message_content}\n\n---\n\nSummary:\n{reflection_output}" - ) - - tracker.collect_step( - step=Step( - name="User_return", - data=execution_message_content, - ) - ) - - new_message = HumanMessage(content=execution_message_content) - updated_messages, error_message = append_chat_messages_with_step_limit( - state, [new_message], max_steps=max_steps - ) - - # Collect tool calls from this execution - execution_tool_calls = ToolCallTracker.stop_tracking() - accumulated_tool_calls = (state.tool_calls or []) + execution_tool_calls - - if error_message: - return create_error_command( - updated_messages, - error_message, - state.step_count, - additional_updates={ - "variables_storage": state.variables_storage, - "variable_counter_state": state.variable_counter_state, - "variable_creation_order": state.variable_creation_order, - "tool_calls": accumulated_tool_calls, - }, - ) - - todo_state_update = extract_task_todos_from_new_vars(new_vars) - base_update = { - "chat_messages": updated_messages, - "variables_storage": state.variables_storage, - "variable_counter_state": state.variable_counter_state, - "variable_creation_order": state.variable_creation_order, - "step_count": state.step_count + 1, - "tool_calls": accumulated_tool_calls, - } - if todo_state_update is not None: - base_update["task_todos"] = todo_state_update - return base_update - except Exception as e: - # Collect tool calls even on error - execution_tool_calls = ToolCallTracker.stop_tracking() - accumulated_tool_calls = (state.tool_calls or []) + execution_tool_calls - - error_msg = f"Error during execution: {str(e)}" - logger.error(error_msg) - new_message = HumanMessage(content=error_msg) - updated_messages, limit_error_message = append_chat_messages_with_step_limit( - state, [new_message], max_steps=max_steps - ) - - if limit_error_message: - return create_error_command(updated_messages, limit_error_message, state.step_count) - - return { - "chat_messages": updated_messages, - "error": error_msg, - "execution_complete": True, - "step_count": state.step_count + 1, - "tool_calls": accumulated_tool_calls, - } - - return sandbox - # Mutable list shared by prepare (create_update_todos) and call_model (system prompt section). task_todos_ref: List[Dict[str, str]] = [] # Execution context: callable tools for the sandbox. Populated by prepare_node. - tools_context = {} + tools_context: Dict[str, Any] = {} # LangChain bind-tools metadata: _lc_bind_tools_* entries for resolve_model_with_bind_tools. # Kept separate so it never leaks into context_locals used by the code executor. lc_bind_tools_meta: Dict[str, Any] = {} - # Create node instances using factories - prepare_node = create_prepare_node( - tool_provider, - prompt_template, - instructions, - tools_context, - special_instructions, - task_todos_ref, - lc_bind_tools_meta=lc_bind_tools_meta, - ) - sandbox_node = create_sandbox_node(tools_context, thread_id, apps_list) - - # Shared call_model node via AgentGraphAdapter adapter = AgentGraphAdapter( tracker=tracker, base_callbacks=callbacks or [], task_todos_ref=task_todos_ref, tools_context_ref=lc_bind_tools_meta, base_tool_provider=tool_provider, + model=model, + prompt_template=prompt_template, + instructions=instructions, + special_instructions=special_instructions, + tools_context=tools_context, + static_prompt=prompt, + thread_id=thread_id, ) + + prepare_node = adapter.build_prepare_node(lc_bind_tools_meta) + sandbox_node = adapter.build_sandbox_node(thread_id, apps_list) call_model_node = _create_shared_call_model_node(adapter, model, settings) return build_agent_graph( diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_approval_handler.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_approval_handler.py index 64bc1c32..e40ae630 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_approval_handler.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_approval_handler.py @@ -297,12 +297,15 @@ def handle_denial(adapter: CoreGraphAdapter, state: Any) -> Optional[Command]: """Handle user denial of tool approval.""" if adapter.get_metadata(state).get("user_approved") is False: logger.warning("User denied tool approval - skipping execution") + meta_key = adapter.metadata_key + cleared_meta = {k: v for k, v in adapter.get_metadata(state).items() if k != "user_approved"} return Command( goto=END, update={ "execution_complete": True, "final_answer": "Execution cancelled by user.", "step_count": state.step_count + 1, + meta_key: cleared_meta, }, ) return None diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/cuga_supervisor_graph.py b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/cuga_supervisor_graph.py index 3dee6062..914e308b 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/cuga_supervisor_graph.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/cuga_supervisor_graph.py @@ -7,7 +7,6 @@ Uses conversational mode: Supervisor acts as a single agent with delegation tools (similar to cuga_lite). """ -import inspect from typing import Any, Dict, List, Optional, Union, Tuple from langchain_core.language_models import BaseChatModel @@ -36,29 +35,6 @@ from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ToolProviderInterface -def _resolve_names_from_caller_frame(variable_names: List[str]) -> Dict[str, Any]: - """Resolve names from the delegated code's caller frame. - - LocalExecutor injects supervisor context into ``_async_main``'s globals; only - using ``f_locals`` missed those bindings, so sub-agents received no variables - and tasks showed e.g. ``amount=None``. - """ - resolved: Dict[str, Any] = {} - frame = inspect.currentframe() - try: - caller = frame.f_back if frame is not None else None - if caller is None: - return resolved - for name in variable_names: - if name in caller.f_locals: - resolved[name] = caller.f_locals[name] - elif name in caller.f_globals: - resolved[name] = caller.f_globals[name] - finally: - del frame - return resolved - - class _CugaSupervisorLoopAdapter(CoreGraphAdapter): """Supervisor seam: messages live on ``supervisor_chat_messages`` (None-safe); step limit from ``state.cuga_lite_max_steps`` else diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/supervisor_graph_adapter.py b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/supervisor_graph_adapter.py index 52b1495d..aefe4a54 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/supervisor_graph_adapter.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/supervisor_graph_adapter.py @@ -338,7 +338,7 @@ async def prepare_agents_and_prompt( } agent_tools_for_prompt.append(tool_info) - from cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_graph import create_update_todos_tool + from cuga.backend.cuga_graph.nodes.cuga_lite.agent_graph_adapter import create_update_todos_tool from cuga.backend.cuga_graph.nodes.cuga_agent_core.code_extraction import make_tool_awaitable todos_tool = await create_update_todos_tool() @@ -473,10 +473,10 @@ async def execute_agent_tool(state: CugaSupervisorState, config: Optional[Runnab existing_vars = {} var_manager = adapter.get_variable_manager(state) - for var_name in var_manager.get_variable_names(): - existing_vars[var_name] = var_manager.get_variable(var_name) - - adapter._shared_vm_ref[0] = var_manager + if var_manager is not None: + for var_name in var_manager.get_variable_names(): + existing_vars[var_name] = var_manager.get_variable(var_name) + adapter._shared_vm_ref[0] = var_manager context = {**existing_vars, **adapter._agent_tools_context} @@ -501,10 +501,11 @@ async def execute_agent_tool(state: CugaSupervisorState, config: Optional[Runnab logger.debug(f"Execution output: {output.strip()[:500]}...") - for name, value in new_vars.items(): - var_manager.add_variable( - value, name=name, description="Created during agent delegation execution" - ) + if var_manager is not None: + for name, value in new_vars.items(): + var_manager.add_variable( + value, name=name, description="Created during agent delegation execution" + ) execution_message_content = execution_output_text(output) new_message = HumanMessage(content=execution_message_content) From 6636d26eb268ec76fa77820fd5f4d61692fb3a25 Mon Sep 17 00:00:00 2001 From: Sami Marreed Date: Thu, 21 May 2026 17:19:00 +0300 Subject: [PATCH 6/7] fix: draft agents.md and add more fixes --- src/cuga/__init__.py | 2 +- src/cuga/backend/cuga_graph/graph.py | 4 +- src/cuga/backend/cuga_graph/nodes/AGENTS.md | 112 +++ .../nodes/chat/chat_agent/chat_agent.py | 2 +- .../cuga_agent_core/execution/__init__.py | 0 .../{ => execution}/code_extraction.py | 4 +- .../nodes/cuga_agent_core/execution/todos.py | 166 ++++ .../{ => execution}/variable_bridge.py | 0 .../nodes/cuga_agent_core/graph/__init__.py | 0 .../{ => graph}/graph_nodes.py | 0 .../{ => graph}/shared_graph.py | 2 +- .../{ => graph}/shared_nodes.py | 7 +- .../nodes/cuga_agent_core/policy/__init__.py | 0 .../{ => policy}/execution_policy.py | 0 .../tests/execution/__init__.py | 0 .../{ => execution}/test_code_extraction.py | 2 +- .../{ => execution}/test_variable_bridge.py | 16 +- .../cuga_agent_core/tests/graph/__init__.py | 0 .../{ => graph}/test_graph_adapter_hooks.py | 2 +- .../tests/{ => graph}/test_graph_nodes.py | 2 +- .../{ => graph}/test_shared_call_model.py | 18 +- .../{ => graph}/test_shared_graph_builder.py | 4 +- .../test_supervisor_feature_parity.py | 10 +- .../cuga_agent_core/tests/policy/__init__.py | 0 .../{ => policy}/test_execution_policy.py | 2 +- .../test_policy_enactment_adapter.py | 2 +- .../test_tool_approval_adapter.py | 2 +- .../cuga_agent_core/tests/tools/__init__.py | 0 .../tests/{ => tools}/test_runtime_tools.py | 4 +- .../test_supervisor_tool_provider.py | 6 +- .../nodes/cuga_agent_core/tools/__init__.py | 0 .../{ => tools}/runtime_tools.py | 2 +- .../cuga_graph/nodes/cuga_lite/__init__.py | 22 +- .../nodes/cuga_lite/agent_graph_adapter.py | 742 +----------------- .../nodes/cuga_lite/cuga_lite_graph.py | 13 +- .../cuga_lite/executors/code_executor.py | 2 +- .../executors/common/call_api_helper.py | 2 +- .../tests/test_execution_plan_wiring.py | 2 +- .../tests/test_extract_codeblocks.py | 4 +- .../executors/tests/test_sync_async_tools.py | 2 +- .../executors/tests/test_tool_call_timeout.py | 4 +- .../nodes/cuga_lite/helpers/__init__.py | 4 + .../nodes/cuga_lite/helpers/bind_tools.py | 340 ++++++++ .../nodes/cuga_lite/helpers/find_tools.py | 156 ++++ .../nodes/cuga_lite/helpers/knowledge.py | 59 ++ .../nodes/cuga_lite/prompt_utils.py | 12 +- .../nodes/cuga_lite/providers/__init__.py | 4 + .../base.py} | 23 +- .../combined.py} | 111 ++- .../langchain.py} | 42 +- .../registry.py} | 102 ++- ...st_tool_call_args.py => test_arguments.py} | 2 +- .../test_cuga_lite_graph_evolve_guidelines.py | 4 +- .../nodes/cuga_lite/tool_approval_handler.py | 4 +- .../nodes/cuga_lite/tracking/__init__.py | 4 + .../arguments.py} | 0 .../tracker.py} | 26 +- .../cuga_supervisor/cuga_supervisor_graph.py | 8 +- .../cuga_supervisor/cuga_supervisor_state.py | 5 +- .../supervisor_graph_adapter.py | 18 +- .../cuga_graph/policy/tests/helpers.py | 2 +- .../test_e2e_healthcare_family_claims.py | 2 +- .../policy/tests/test_e2e_output_formatter.py | 2 +- .../tests/test_e2e_playbook_guidance.py | 2 +- .../tests/test_e2e_playbook_refinement.py | 2 +- .../policy/tests/test_e2e_tool_enrichment.py | 2 +- .../tests/test_tool_approval_full_graph.py | 2 +- src/cuga/backend/server/main.py | 2 +- src/cuga/sdk.py | 8 +- .../sdk_core/tests/test_sdk_integration.py | 4 +- .../supervisor_utils/supervisor_config.py | 4 +- tests/integration/test_llm_config_publish.py | 2 +- tests/integration/test_tool_call_tracking.py | 24 +- tests/system/test_manager_api_integration.py | 2 +- tests/unit/test_cuga_lite_bind_tools.py | 2 +- tests/unit/test_cuga_lite_knowledge_scopes.py | 2 +- tests/unit/test_find_tools_exception.py | 14 +- 77 files changed, 1162 insertions(+), 1004 deletions(-) create mode 100644 src/cuga/backend/cuga_graph/nodes/AGENTS.md create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/execution/__init__.py rename src/cuga/backend/cuga_graph/nodes/cuga_agent_core/{ => execution}/code_extraction.py (96%) create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/execution/todos.py rename src/cuga/backend/cuga_graph/nodes/cuga_agent_core/{ => execution}/variable_bridge.py (100%) create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph/__init__.py rename src/cuga/backend/cuga_graph/nodes/cuga_agent_core/{ => graph}/graph_nodes.py (100%) rename src/cuga/backend/cuga_graph/nodes/cuga_agent_core/{ => graph}/shared_graph.py (95%) rename src/cuga/backend/cuga_graph/nodes/cuga_agent_core/{ => graph}/shared_nodes.py (97%) create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/policy/__init__.py rename src/cuga/backend/cuga_graph/nodes/cuga_agent_core/{ => policy}/execution_policy.py (100%) create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/execution/__init__.py rename src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/{ => execution}/test_code_extraction.py (97%) rename src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/{ => execution}/test_variable_bridge.py (86%) create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/graph/__init__.py rename src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/{ => graph}/test_graph_adapter_hooks.py (98%) rename src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/{ => graph}/test_graph_nodes.py (99%) rename src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/{ => graph}/test_shared_call_model.py (92%) rename src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/{ => graph}/test_shared_graph_builder.py (96%) rename src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/{ => graph}/test_supervisor_feature_parity.py (95%) create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/policy/__init__.py rename src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/{ => policy}/test_execution_policy.py (99%) rename src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/{ => policy}/test_policy_enactment_adapter.py (97%) rename src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/{ => policy}/test_tool_approval_adapter.py (98%) create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/tools/__init__.py rename src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/{ => tools}/test_runtime_tools.py (98%) rename src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/{ => tools}/test_supervisor_tool_provider.py (94%) create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tools/__init__.py rename src/cuga/backend/cuga_graph/nodes/cuga_agent_core/{ => tools}/runtime_tools.py (98%) create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/__init__.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/bind_tools.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/find_tools.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/knowledge.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_lite/providers/__init__.py rename src/cuga/backend/cuga_graph/nodes/cuga_lite/{tool_provider_interface.py => providers/base.py} (78%) rename src/cuga/backend/cuga_graph/nodes/cuga_lite/{combined_tool_provider.py => providers/combined.py} (84%) rename src/cuga/backend/cuga_graph/nodes/cuga_lite/{direct_langchain_tools_provider.py => providers/langchain.py} (82%) rename src/cuga/backend/cuga_graph/nodes/cuga_lite/{tool_registry_provider.py => providers/registry.py} (82%) rename src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/{test_tool_call_args.py => test_arguments.py} (91%) create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_lite/tracking/__init__.py rename src/cuga/backend/cuga_graph/nodes/cuga_lite/{tool_call_args.py => tracking/arguments.py} (100%) rename src/cuga/backend/cuga_graph/nodes/cuga_lite/{tool_call_tracker.py => tracking/tracker.py} (93%) diff --git a/src/cuga/__init__.py b/src/cuga/__init__.py index 9d7e18f0..49229049 100644 --- a/src/cuga/__init__.py +++ b/src/cuga/__init__.py @@ -24,7 +24,7 @@ def get_weather(city: str) -> str: """ from cuga.sdk import CugaAgent, CugaSupervisor, run_agent, InvokeResult -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_call_tracker import tracked_tool +from cuga.backend.cuga_graph.nodes.cuga_lite.tracking.tracker import tracked_tool from cuga.backend.knowledge import KnowledgeClient, KnowledgeEngine from cuga.backend.knowledge.config import KnowledgeConfig diff --git a/src/cuga/backend/cuga_graph/graph.py b/src/cuga/backend/cuga_graph/graph.py index f55cfedb..7f2d670f 100644 --- a/src/cuga/backend/cuga_graph/graph.py +++ b/src/cuga/backend/cuga_graph/graph.py @@ -47,8 +47,8 @@ from cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_graph import ( create_cuga_lite_graph, ) -from cuga.backend.cuga_graph.nodes.cuga_lite.combined_tool_provider import CombinedToolProvider -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ToolProviderInterface +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.combined import CombinedToolProvider +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.base import ToolProviderInterface from cuga.backend.cuga_graph.nodes.cuga_supervisor.cuga_supervisor_node import CugaSupervisorNode from cuga.backend.cuga_graph.nodes.cuga_supervisor.cuga_supervisor_graph import ( create_cuga_supervisor_graph, diff --git a/src/cuga/backend/cuga_graph/nodes/AGENTS.md b/src/cuga/backend/cuga_graph/nodes/AGENTS.md new file mode 100644 index 00000000..932612a2 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/AGENTS.md @@ -0,0 +1,112 @@ +# Agent Architecture & Contribution Guide (`AGENTS.md`) + +Welcome to the **Cuga Agent Nodes** directory. This directory is the core of Cuga's agentic reasoning and execution architecture. It houses the workflows, state-graphs, tool providers, and execution environments that power our lightweight single-agent (`CugaLite`) and multi-agent orchestrator (`CugaSupervisor`) systems. + +This document describes the high-level architecture of this directory, coding standards (such as keeping files under 1,000 lines), and instructions on how to contribute modularly. + +--- + +## 1. Directory Structure & Key Components + +The directory is structured to separate shared core machinery, agent graph orchestrators, and specialized task agents. + +``` +src/cuga/backend/cuga_graph/nodes/ +├── cuga_agent_core/ # Shared core adapter-seam engine & execution utilities +│ ├── graph/ # Node builders and adapter interfaces (e.g., CoreGraphAdapter) +│ ├── execution/ # Code extraction, variable bridges, and todo tracking +│ ├── tools/ # Runtime shell/filesystem tool injection orchestrators +│ └── policy/ # Routing environments (local vs sandboxed sandbox) +│ +├── cuga_lite/ # Optimized single-agent graph (LangGraph) with code-execution loops +│ ├── helpers/ # Helper tools (knowledge checks, bind_tools, find_tools) +│ ├── providers/ # Modular tool registrations and direct LangChain adapters +│ ├── tracking/ # Tool-call tracking and execution logging +│ └── executors/ # Execution runtimes (local, native, OpenSandbox, Docker, E2B) +│ +├── cuga_supervisor/ # Orchestrator subgraph that delegates tasks to downstream agents +│ └── a2a_protocol.py # Actor-to-Agent protocol & sub-task dispatching +│ +├── api/ # Specialized API-interaction code and planning agents +├── browser/ # Specialized UI/browser automation and planning agents +├── chat/ # Conversational agents +├── task_decomposition_planning/ # Pre-flight request analyzer and breakdown planning +├── answer/ # Synthesis agents producing clean user-visible answers +├── save_reuse/ # Skill recording and reuse agents +└── shared/ # Base agent abstractions +``` + +--- + +## 2. Core Architectural Philosophy: The Adapter Pattern + +To prevent code duplication and drift between the single-agent (`CugaLite`) and multi-agent supervisor (`CugaSupervisor`), we isolate graph-specific behavior behind the **Adapter Pattern**: + +1. **`CoreGraphAdapter` (Abstract Base Class)**: Lives in `cuga_agent_core/graph/graph_nodes.py`. It defines abstract hooks for message handling, step limit resolutions, personal instruction injections, metadata management, and post-invocation side-effects. +2. **`build_agent_graph`**: Lives in `cuga_agent_core/graph/shared_graph.py`. It builds a canonical 3-node LangGraph structure: + ``` + START ──> prepare ──> call_model <──> execute (sandbox / tool loop) ──> END + ``` + The builder is 100% graph-agnostic; it is parameterized entirely by the adapter and nodes provided at construction time. +3. **`AgentGraphAdapter` & `SupervisorGraphAdapter`**: Concrete implementations that customize the execution loop for single-agent coding loops or supervisor delegation loops respectively. + +--- + +## 3. Strict Contribution Standards: Code Modularity & Line Limits + +To ensure high maintainability, readability, and ease of testing, all contributions must strictly adhere to the following code organization rules: + +### ⚠️ The Under-1000-Line Rule +* **No single file in this directory should exceed 1,000 lines of code if possible.** +* Large monolithic files are a severe anti-pattern. They complicate unit testing, increase git conflicts, and obscure architectural seams. +* If a module or adapter begins to approach or exceed this limit, it is **mandatory** to refactor and extract cohesive sub-components into sub-packages. + +### 🧩 Phased Modularization & Sub-packages +When extending logic, do not append code to existing root-level files. Instead: +1. **Extract Helpers into Sub-directories**: + Create a dedicated sub-folder (e.g., `helpers/`, `providers/`, `tracking/`) with an `__init__.py` to organize related files cleanly. +2. **Expose Interfaces via Packaged Packages**: + Use `__init__.py` file exports to keep imports elsewhere in the application clean (e.g., importing from `cuga_lite.providers` rather than raw file paths). +3. **Minimize Cross-Module Coupling**: + Ensure downstream layers (like `cuga_supervisor`) never depend on the internal helper files of upstream layers (like `cuga_lite`). Shared schemas (like `todos.py`) must live in `cuga_agent_core`. + +--- + +## 4. How to Contribute & Reorganize Code + +When adding a new feature, optimizing an execution node, or restructuring files, follow this sequence of steps to maintain system stability: + +### Step 1: Design Clean Interfaces +If adding behavior, check if it fits within the `CoreGraphAdapter` hook system. If it does, add a default no-op implementation to `CoreGraphAdapter` and override it only in the target concrete subclass. + +### Step 2: Extract and Isolate +Write lightweight, highly specialized modules with singular responsibilities. +For example, instead of writing database-access logic directly inside a node: +* Define a base interface class. +* Implement a concrete handler class in a `providers/` sub-package. +* Bind the concrete handler at graph assembly time. + +### Step 3: Run the Code Quality Pipeline +We use `ruff` for extremely fast linting and formatting. Run these commands before committing any Python changes: +```bash +# Auto-format files to adhere to project standards +uv run ruff format src/cuga/backend/cuga_graph/nodes/ + +# Perform lint checks and catch potential errors +uv run ruff check src/cuga/backend/cuga_graph/nodes/ +``` + +### Step 4: Run the Unit & Integration Tests +Ensure that your changes do not introduce regressions: +```bash +# Run unit tests for the core engine +uv run pytest src/cuga/backend/cuga_graph/nodes/cuga_agent_core/ + +# Run unit tests for Cuga Lite +uv run pytest src/cuga/backend/cuga_graph/nodes/cuga_lite/ + +# Run the complete test suite +uv run pytest +``` + +Always write new unit tests in the corresponding `tests/` sub-folder (e.g. `cuga_agent_core/tests/graph/`, `cuga_agent_core/tests/execution/`) to verify all new branches and components. diff --git a/src/cuga/backend/cuga_graph/nodes/chat/chat_agent/chat_agent.py b/src/cuga/backend/cuga_graph/nodes/chat/chat_agent/chat_agent.py index 7093e591..83a20ccc 100644 --- a/src/cuga/backend/cuga_graph/nodes/chat/chat_agent/chat_agent.py +++ b/src/cuga/backend/cuga_graph/nodes/chat/chat_agent/chat_agent.py @@ -14,7 +14,7 @@ from mcp import ClientSession from cuga.backend.cuga_graph.nodes.shared.base_agent import BaseAgent -from cuga.backend.cuga_graph.nodes.cuga_lite.combined_tool_provider import CombinedToolProvider +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.combined import CombinedToolProvider from cuga.backend.cuga_graph.state.agent_state import AgentState from cuga.backend.cuga_graph.utils.context_management_utils import apply_context_summarization diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/execution/__init__.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/execution/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/code_extraction.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/execution/code_extraction.py similarity index 96% rename from src/cuga/backend/cuga_graph/nodes/cuga_agent_core/code_extraction.py rename to src/cuga/backend/cuga_graph/nodes/cuga_agent_core/execution/code_extraction.py index 88be6caa..c1087555 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/code_extraction.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/execution/code_extraction.py @@ -17,7 +17,7 @@ from pydantic import BaseModel -BACKTICK_PATTERN = r'```python(.*?)```' +BACKTICK_PATTERN = r"```python(.*?)```" def extract_and_combine_codeblocks(text: str) -> str: @@ -39,7 +39,7 @@ def extract_and_combine_codeblocks(text: str) -> str: return "" try: - compile(stripped_text.replace('await ', ''), '', 'exec') + compile(stripped_text.replace("await ", ""), "", "exec") return stripped_text except SyntaxError: return "" diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/execution/todos.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/execution/todos.py new file mode 100644 index 00000000..6d42caa0 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/execution/todos.py @@ -0,0 +1,166 @@ +"""Task todos schemas, formatting, and tool shared across all agents.""" + +from __future__ import annotations + +from typing import Any, Dict, List, Optional + +from langchain_core.tools import StructuredTool +from pydantic import BaseModel, Field + + +class Todo(BaseModel): + """A single todo item with text and status.""" + + text: str = Field(..., description="The task description") + status: str = Field( + default="pending", + description="Status of the todo: 'pending', 'in_progress', or 'completed'", + ) + + +class TodosInput(BaseModel): + """Input schema for create_update_todos function.""" + + todos: List[Todo] = Field(..., description="List of todos, each with 'text' and 'status' fields") + + +class TodosOutput(BaseModel): + """Output schema for create_update_todos function.""" + + todos: List[Todo] = Field(..., description="List of todos with their current status") + + +def _try_parse_todos_payload(value: Any) -> Optional[List[Dict[str, Any]]]: + if not isinstance(value, dict) or "todos" not in value: + return None + raw = value["todos"] + if not isinstance(raw, list): + return None + if not raw: + return [] + if not all(isinstance(x, dict) and "text" in x and "status" in x for x in raw): + return None + return raw + + +def extract_task_todos_from_new_vars(new_vars: dict) -> Optional[List[Dict[str, Any]]]: + for val in new_vars.values(): + parsed = _try_parse_todos_payload(val) + if parsed is not None: + return parsed + return None + + +def _serialize_todos_for_store(todos_list: List[Any]) -> List[Dict[str, str]]: + out: List[Dict[str, str]] = [] + for t in todos_list: + if isinstance(t, Todo): + out.append({"text": t.text, "status": t.status}) + elif hasattr(t, "model_dump"): + d = t.model_dump() + out.append({"text": str(d.get("text", "")), "status": str(d.get("status", "pending"))}) + elif isinstance(t, dict): + out.append({"text": str(t.get("text", "")), "status": str(t.get("status", "pending"))}) + else: + out.append({"text": str(t), "status": "pending"}) + return out + + +async def create_update_todos_tool( + agent_state: Optional[Any] = None, + todos_store_ref: Optional[List[Dict[str, str]]] = None, +) -> StructuredTool: + """Create a create_update_todos StructuredTool for managing task todos. + + Args: + agent_state: Optional AgentState (reserved for future use) + todos_store_ref: Mutable list shared with the graph; latest todos are written here for the system prompt. + + Returns: + StructuredTool configured for creating and updating todos + """ + + async def create_update_todos_func(todos: Any) -> TodosOutput: + """Create or update a list of todos for complex multi-step tasks. + + Use this tool when you have a complex task that requires multiple steps. + This helps you track progress and organize your work. + + Args: + todos: List of todo dicts/models (matches ``TodosInput.todos`` / tool schema). + + Returns: + Short confirmation only (full list is shown in the system prompt via todos_store_ref). + """ + input_data = todos + # Handle different input types + if isinstance(input_data, TodosInput): + todos_list = input_data.todos + elif isinstance(input_data, dict): + # If it's a dict, check if it has 'todos' key + if "todos" in input_data: + todos_list = input_data["todos"] + else: + # If no 'todos' key, treat the whole dict as a single todo or wrap it + todos_list = [input_data] + # Convert dict items to Todo models + todos_list = [Todo(**todo) if isinstance(todo, dict) else todo for todo in todos_list] + elif isinstance(input_data, list): + # If it's a list directly, convert each item to Todo + todos_list = [Todo(**todo) if isinstance(todo, dict) else todo for todo in input_data] + else: + # Fallback: try to create TodosInput + try: + if isinstance(input_data, dict): + input_data = TodosInput(**input_data) + else: + input_data = TodosInput(todos=input_data) + todos_list = input_data.todos + except Exception: + # Last resort: wrap in a list + todos_list = [Todo(**input_data) if isinstance(input_data, dict) else input_data] + + if todos_store_ref is not None: + serialized = _serialize_todos_for_store(todos_list) + todos_store_ref.clear() + todos_store_ref.extend(serialized) + + normalized = [t if isinstance(t, Todo) else Todo(**t) for t in todos_list] + return TodosOutput(todos=normalized) + + return StructuredTool.from_function( + func=create_update_todos_func, + name="create_update_todos", + description="Create or update a list of todos for complex multi-step tasks. Pass `todos` as a list of objects with 'text' and 'status' ('pending', 'in_progress', or 'completed'). Returns a todos payload; the full list is shown in the system prompt under 'Current task todos' (Current Plan).", + args_schema=TodosInput, + return_direct=False, + ) + + +def format_task_todos_system_block(todos: List[Dict[str, str]]) -> str: + if not todos: + return "" + lines = [ + "", + "---", + "", + "## Current task todos", + "", + "Execution only prints **Todos updated** after each change; use this list as the source of truth.", + "", + ] + for i, item in enumerate(todos, start=1): + status = item.get("status", "pending") + text = item.get("text", "") + lines.append(f"{i}. **[{status}]** {text}") + lines.append("") + return "\n".join(lines) + + +def format_current_plan_section(task_todos: List[Dict[str, Any]]) -> str: + lines = ["## Current Plan", ""] + for item in task_todos: + text = str(item.get("text", "")).strip() + status = str(item.get("status", "pending")).strip() + lines.append(f"- **[{status}]** {text}") + return "\n".join(lines) + "\n" diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/variable_bridge.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/execution/variable_bridge.py similarity index 100% rename from src/cuga/backend/cuga_graph/nodes/cuga_agent_core/variable_bridge.py rename to src/cuga/backend/cuga_graph/nodes/cuga_agent_core/execution/variable_bridge.py diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph/__init__.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph_nodes.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph/graph_nodes.py similarity index 100% rename from src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph_nodes.py rename to src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph/graph_nodes.py diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/shared_graph.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph/shared_graph.py similarity index 95% rename from src/cuga/backend/cuga_graph/nodes/cuga_agent_core/shared_graph.py rename to src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph/shared_graph.py index 9d1110fe..c367e555 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/shared_graph.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph/shared_graph.py @@ -19,7 +19,7 @@ from langgraph.graph import START, StateGraph -from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import CoreGraphAdapter +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.graph_nodes import CoreGraphAdapter def build_agent_graph( diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/shared_nodes.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph/shared_nodes.py similarity index 97% rename from src/cuga/backend/cuga_graph/nodes/cuga_agent_core/shared_nodes.py rename to src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph/shared_nodes.py index d96b27b0..70324c9b 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/shared_nodes.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph/shared_nodes.py @@ -24,14 +24,15 @@ from typing import Any, Callable, Optional from langchain_core.messages import AIMessage, HumanMessage +from langchain_core.runnables import RunnableConfig from langgraph.graph import END from langgraph.types import Command from loguru import logger -from cuga.backend.cuga_graph.nodes.cuga_agent_core.code_extraction import ( +from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.code_extraction import ( extract_code_from_model_response, ) -from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import ( +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.graph_nodes import ( CoreGraphAdapter, enforce_step_limit, ) @@ -53,7 +54,7 @@ def create_call_model_node( settings: Application settings object (policy, advanced_features, …). """ - async def call_model(state: Any, config: Any = None) -> Command: + async def call_model(state: Any, config: RunnableConfig | None = None) -> Command: configurable: dict = config.get("configurable", {}) if config else {} # ── Tool-approval HITL resumption ────────────────────────────────── diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/policy/__init__.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/policy/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/execution_policy.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/policy/execution_policy.py similarity index 100% rename from src/cuga/backend/cuga_graph/nodes/cuga_agent_core/execution_policy.py rename to src/cuga/backend/cuga_graph/nodes/cuga_agent_core/policy/execution_policy.py diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/execution/__init__.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/execution/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_code_extraction.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/execution/test_code_extraction.py similarity index 97% rename from src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_code_extraction.py rename to src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/execution/test_code_extraction.py index 7294a1ae..db0a7ea3 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_code_extraction.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/execution/test_code_extraction.py @@ -19,7 +19,7 @@ from pydantic import BaseModel -from cuga.backend.cuga_graph.nodes.cuga_agent_core.code_extraction import ( +from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.code_extraction import ( extract_and_combine_codeblocks, extract_code_from_model_response, make_tool_awaitable, diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_variable_bridge.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/execution/test_variable_bridge.py similarity index 86% rename from src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_variable_bridge.py rename to src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/execution/test_variable_bridge.py index 655adcad..70550455 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_variable_bridge.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/execution/test_variable_bridge.py @@ -21,7 +21,7 @@ def test_extract_values_returns_name_value_dict(): """extract_values strips metadata, leaving {name: raw_value}.""" - from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.variable_bridge import VariableBridge storage = { "amount": {"value": 99, "description": "desc", "type": "int", "created_at": "...", "count_items": 0}, @@ -33,7 +33,7 @@ def test_extract_values_returns_name_value_dict(): def test_extract_values_skips_entries_without_value_key(): """Malformed storage entries (no 'value' key) are silently skipped.""" - from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.variable_bridge import VariableBridge storage = { "good": {"value": 42, "type": "int"}, @@ -45,14 +45,14 @@ def test_extract_values_skips_entries_without_value_key(): def test_extract_values_empty_storage_returns_empty(): - from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.variable_bridge import VariableBridge assert VariableBridge.extract_values({}) == {} def test_bridge_copies_values_into_target_manager(): """bridge() writes each value into the target VariablesManager under its name.""" - from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.variable_bridge import VariableBridge target = VariablesManager() bridged = VariableBridge.bridge({"x": 10, "y": "hello"}, target) @@ -65,7 +65,7 @@ def test_bridge_copies_values_into_target_manager(): def test_bridge_empty_source_returns_empty_list_and_leaves_manager_unchanged(): - from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.variable_bridge import VariableBridge target = VariablesManager() target.add_variable(1, name="pre_existing") @@ -77,7 +77,7 @@ def test_bridge_empty_source_returns_empty_list_and_leaves_manager_unchanged(): def test_bridge_description_prefix_is_applied(): """bridge() stores variables with the given description prefix.""" - from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.variable_bridge import VariableBridge target = VariablesManager() VariableBridge.bridge({"result": 7}, target, description_prefix="from customer_agent") @@ -118,7 +118,7 @@ def test_shared_vm_ref_allows_delegate_to_write_into_supervisor_vm(): - delegate_to_agent reads _shared_vm_ref[0] and calls VariableBridge.bridge(...) - The target VM accumulates the bridged variables """ - from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.variable_bridge import VariableBridge _shared_vm_ref: List[Any] = [None] @@ -137,7 +137,7 @@ def test_shared_vm_ref_allows_delegate_to_write_into_supervisor_vm(): def test_shared_vm_ref_none_skips_bridge_gracefully(): """When _shared_vm_ref[0] is None (before first execute), bridge is skipped.""" - from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.variable_bridge import VariableBridge _shared_vm_ref: List[Any] = [None] sub_agent_vars = {"result": "ok"} diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/graph/__init__.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/graph/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_graph_adapter_hooks.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/graph/test_graph_adapter_hooks.py similarity index 98% rename from src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_graph_adapter_hooks.py rename to src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/graph/test_graph_adapter_hooks.py index ce6702fe..7836a3a5 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_graph_adapter_hooks.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/graph/test_graph_adapter_hooks.py @@ -19,7 +19,7 @@ import pytest from langchain_core.messages import BaseMessage, HumanMessage -from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import CoreGraphAdapter +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.graph_nodes import CoreGraphAdapter # ── Minimal concrete adapter (only satisfies abstract methods) ───────────── diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_graph_nodes.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/graph/test_graph_nodes.py similarity index 99% rename from src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_graph_nodes.py rename to src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/graph/test_graph_nodes.py index 12f59319..716d4a58 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_graph_nodes.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/graph/test_graph_nodes.py @@ -19,7 +19,7 @@ from langchain_core.messages import AIMessage, HumanMessage from langgraph.graph import END -from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import ( +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.graph_nodes import ( CoreGraphAdapter, append_chat_messages_with_step_limit, create_error_command, diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_shared_call_model.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/graph/test_shared_call_model.py similarity index 92% rename from src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_shared_call_model.py rename to src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/graph/test_shared_call_model.py index e721ec37..a8901c4a 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_shared_call_model.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/graph/test_shared_call_model.py @@ -23,7 +23,7 @@ from langgraph.graph import END from langgraph.types import Command -from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import CoreGraphAdapter +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.graph_nodes import CoreGraphAdapter # ── Shared test adapter ──────────────────────────────────────────────────── @@ -87,7 +87,7 @@ def _mock_settings(policy_enabled=False): # The factory under test — imported lazily so we see ImportError (RED) clearly. def _get_factory(): - from cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_nodes import ( + from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.shared_nodes import ( create_call_model_node, ) @@ -99,7 +99,7 @@ def _get_factory(): @pytest.mark.asyncio @patch( - "cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_nodes.apply_context_summarization", + "cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.shared_nodes.apply_context_summarization", new_callable=AsyncMock, ) async def test_code_path_routes_to_execute_node(mock_summarize): @@ -125,7 +125,7 @@ async def test_code_path_routes_to_execute_node(mock_summarize): @pytest.mark.asyncio @patch( - "cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_nodes.apply_context_summarization", + "cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.shared_nodes.apply_context_summarization", new_callable=AsyncMock, ) async def test_no_code_path_routes_to_end(mock_summarize): @@ -151,7 +151,7 @@ async def test_no_code_path_routes_to_end(mock_summarize): @pytest.mark.asyncio @patch( - "cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_nodes.apply_context_summarization", + "cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.shared_nodes.apply_context_summarization", new_callable=AsyncMock, ) async def test_step_limit_in_code_path_routes_to_end_with_error(mock_summarize): @@ -178,7 +178,7 @@ async def test_step_limit_in_code_path_routes_to_end_with_error(mock_summarize): @pytest.mark.asyncio @patch( - "cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_nodes.apply_context_summarization", + "cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.shared_nodes.apply_context_summarization", new_callable=AsyncMock, ) async def test_step_limit_in_no_code_path_routes_to_end_with_error(mock_summarize): @@ -211,7 +211,7 @@ async def classify_auto_continue( @pytest.mark.asyncio @patch( - "cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_nodes.apply_context_summarization", + "cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.shared_nodes.apply_context_summarization", new_callable=AsyncMock, ) async def test_auto_continue_loops_back_to_call_model(mock_summarize): @@ -246,7 +246,7 @@ def get_messages(self, state: Any) -> List[BaseMessage]: @pytest.mark.asyncio @patch( - "cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_nodes.apply_context_summarization", + "cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.shared_nodes.apply_context_summarization", new_callable=AsyncMock, ) async def test_adapter_messages_key_and_execute_node_name_respected(mock_summarize): @@ -280,7 +280,7 @@ async def test_adapter_messages_key_and_execute_node_name_respected(mock_summari @pytest.mark.asyncio @patch( - "cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_nodes.apply_context_summarization", + "cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.shared_nodes.apply_context_summarization", new_callable=AsyncMock, ) async def test_configurable_llm_overrides_base_model(mock_summarize): diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_shared_graph_builder.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/graph/test_shared_graph_builder.py similarity index 96% rename from src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_shared_graph_builder.py rename to src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/graph/test_shared_graph_builder.py index 8a2dbb52..be8d71aa 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_shared_graph_builder.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/graph/test_shared_graph_builder.py @@ -15,7 +15,7 @@ from langchain_core.messages import BaseMessage from pydantic import BaseModel -from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import CoreGraphAdapter +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.graph_nodes import CoreGraphAdapter # ── Minimal adapter & state for graph construction ───────────────────────── @@ -56,7 +56,7 @@ async def node(state, config=None): def _get_builder(): - from cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_graph import build_agent_graph + from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.shared_graph import build_agent_graph return build_agent_graph diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_supervisor_feature_parity.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/graph/test_supervisor_feature_parity.py similarity index 95% rename from src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_supervisor_feature_parity.py rename to src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/graph/test_supervisor_feature_parity.py index e2915c4c..4e8236de 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_supervisor_feature_parity.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/graph/test_supervisor_feature_parity.py @@ -85,7 +85,7 @@ def test_invoke_result_variables_round_trips_via_extract_values(): This pins the contract between CugaAgent.invoke() and the bridge wiring. """ - from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.variable_bridge import VariableBridge from cuga.sdk import InvokeResult # Simulate what the graph result dict contains @@ -106,7 +106,7 @@ def test_invoke_result_variables_round_trips_via_extract_values(): def test_invoke_result_variables_empty_storage_gives_empty_dict(): """Empty variables_storage → InvokeResult.variables == {}.""" - from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.variable_bridge import VariableBridge from cuga.sdk import InvokeResult result = InvokeResult(answer="done", variables=VariableBridge.extract_values({})) @@ -119,7 +119,7 @@ def test_invoke_result_variables_empty_storage_gives_empty_dict(): def test_split_note_absent_means_join_skips_it(): """When split execution is not active, split_execution_note returns '', and the join correctly omits it.""" - from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution_policy import ( + from cuga.backend.cuga_graph.nodes.cuga_agent_core.policy.execution_policy import ( ExecutionPlan, split_execution_note, ) @@ -138,7 +138,7 @@ def test_split_note_absent_means_join_skips_it(): def test_split_note_present_means_join_includes_it(): """When split execution is active, the note is non-empty and joins.""" - from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution_policy import ( + from cuga.backend.cuga_graph.nodes.cuga_agent_core.policy.execution_policy import ( ExecutionPlan, split_execution_note, ) @@ -168,7 +168,7 @@ def test_bridge_skips_none_values(): placeholders. """ from cuga.backend.cuga_graph.state.agent_state import VariablesManager - from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.variable_bridge import VariableBridge target = VariablesManager() bridged = VariableBridge.bridge({"good": 42, "bad": None}, target) diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/policy/__init__.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/policy/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_execution_policy.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/policy/test_execution_policy.py similarity index 99% rename from src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_execution_policy.py rename to src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/policy/test_execution_policy.py index 9f7fd858..8b9ab14c 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_execution_policy.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/policy/test_execution_policy.py @@ -12,7 +12,7 @@ from types import SimpleNamespace -from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution_policy import ( +from cuga.backend.cuga_graph.nodes.cuga_agent_core.policy.execution_policy import ( ExecutionPlan, ExecutionRouter, split_execution_note, diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_policy_enactment_adapter.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/policy/test_policy_enactment_adapter.py similarity index 97% rename from src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_policy_enactment_adapter.py rename to src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/policy/test_policy_enactment_adapter.py index 7a42fa91..887e36ec 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_policy_enactment_adapter.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/policy/test_policy_enactment_adapter.py @@ -17,7 +17,7 @@ from langchain_core.messages import AIMessage, HumanMessage from langgraph.graph import END -from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import CoreGraphAdapter +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.graph_nodes import CoreGraphAdapter from cuga.backend.cuga_graph.policy.enactment import PolicyEnactment diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_tool_approval_adapter.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/policy/test_tool_approval_adapter.py similarity index 98% rename from src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_tool_approval_adapter.py rename to src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/policy/test_tool_approval_adapter.py index fa7b657a..f30f9d7f 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_tool_approval_adapter.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/policy/test_tool_approval_adapter.py @@ -14,7 +14,7 @@ from langchain_core.messages import AIMessage, HumanMessage from langgraph.graph import END -from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import CoreGraphAdapter +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.graph_nodes import CoreGraphAdapter from cuga.backend.cuga_graph.nodes.cuga_lite.tool_approval_handler import ToolApprovalHandler diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/tools/__init__.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/tools/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_runtime_tools.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/tools/test_runtime_tools.py similarity index 98% rename from src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_runtime_tools.py rename to src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/tools/test_runtime_tools.py index 6e07e4e1..b2a77498 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_runtime_tools.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/tools/test_runtime_tools.py @@ -18,7 +18,7 @@ import pytest -from cuga.backend.cuga_graph.nodes.cuga_agent_core.runtime_tools import ( +from cuga.backend.cuga_graph.nodes.cuga_agent_core.tools.runtime_tools import ( RuntimeBackends, ToolBundle, build_runtime_tools, @@ -275,7 +275,7 @@ async def test_skill_tool_func_is_awaitable_via_make_tool_awaitable(): Supervisor's execute_agent_tool can ``await load_skill(...)`` in code blocks.""" from cuga.backend.skills.registry import SkillEntry, SkillRegistry from cuga.backend.skills.tools import create_skill_tools - from cuga.backend.cuga_graph.nodes.cuga_agent_core.code_extraction import make_tool_awaitable + from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.code_extraction import make_tool_awaitable entry = SkillEntry(name="my_skill", description="Test.", body="instructions", source="t") registry = SkillRegistry([entry]) diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_supervisor_tool_provider.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/tools/test_supervisor_tool_provider.py similarity index 94% rename from src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_supervisor_tool_provider.py rename to src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/tools/test_supervisor_tool_provider.py index e0b38dba..0bfc3bb5 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/test_supervisor_tool_provider.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/tools/test_supervisor_tool_provider.py @@ -22,9 +22,9 @@ import pytest from langchain_core.tools import StructuredTool -from cuga.backend.cuga_graph.nodes.cuga_agent_core.code_extraction import make_tool_awaitable -from cuga.backend.cuga_graph.nodes.cuga_agent_core.runtime_tools import prompt_tool_dicts -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ToolProviderInterface +from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.code_extraction import make_tool_awaitable +from cuga.backend.cuga_graph.nodes.cuga_agent_core.tools.runtime_tools import prompt_tool_dicts +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.base import ToolProviderInterface # ── helpers ──────────────────────────────────────────────────────────────── diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tools/__init__.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tools/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/runtime_tools.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tools/runtime_tools.py similarity index 98% rename from src/cuga/backend/cuga_graph/nodes/cuga_agent_core/runtime_tools.py rename to src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tools/runtime_tools.py index e0ac4672..0591bffc 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/runtime_tools.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tools/runtime_tools.py @@ -20,7 +20,7 @@ from loguru import logger -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import AppDefinition +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.base import AppDefinition FilesystemChoice = Literal["none", "host", "sandbox_remote"] ShellChoice = Literal["none", "local", "native", "opensandbox"] diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/__init__.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/__init__.py index 5945db17..998214cc 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/__init__.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/__init__.py @@ -1,7 +1,7 @@ # Re-export commonly used classes and functions -from cuga.backend.cuga_graph.nodes.cuga_lite.combined_tool_provider import CombinedToolProvider -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_registry_provider import ToolRegistryProvider -from cuga.backend.cuga_graph.nodes.cuga_lite.direct_langchain_tools_provider import ( +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.combined import CombinedToolProvider +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.registry import ToolRegistryProvider +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.langchain import ( DirectLangChainToolsProvider, ) from cuga.backend.cuga_graph.nodes.cuga_lite.prompt_utils import ( @@ -13,12 +13,12 @@ from cuga.backend.cuga_graph.nodes.cuga_lite.executors import CodeExecutor __all__ = [ - 'CombinedToolProvider', - 'ToolRegistryProvider', - 'DirectLangChainToolsProvider', - 'create_mcp_prompt', - 'normalize_mcp_few_shot_examples', - 'resolve_cuga_lite_few_shots_enabled', - 'PromptUtils', - 'CodeExecutor', + "CombinedToolProvider", + "ToolRegistryProvider", + "DirectLangChainToolsProvider", + "create_mcp_prompt", + "normalize_mcp_few_shot_examples", + "resolve_cuga_lite_few_shots_enabled", + "PromptUtils", + "CodeExecutor", ] diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/agent_graph_adapter.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/agent_graph_adapter.py index 92cca7bc..55a71e63 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/agent_graph_adapter.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/agent_graph_adapter.py @@ -19,30 +19,26 @@ import json import os from pathlib import Path -from typing import Any, Dict, List, Optional, Set, Tuple +from typing import Any, Dict, List, Optional, Tuple -from langchain_core.exceptions import OutputParserException -from langchain_core.language_models import BaseChatModel from langchain_core.messages import AIMessage, BaseMessage, HumanMessage from langchain_core.runnables import RunnableConfig -from langchain_core.tools import StructuredTool from langgraph.types import Command from loguru import logger -from pydantic import BaseModel, Field from cuga.backend.activity_tracker.tracker import Step -from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import ( +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.graph_nodes import ( CoreGraphAdapter, append_chat_messages_with_step_limit as _core_append_with_step_limit, create_error_command as _core_create_error_command, execution_output_text, ) -from cuga.backend.cuga_graph.nodes.cuga_agent_core.code_extraction import make_tool_awaitable -from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution_policy import ( +from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.code_extraction import make_tool_awaitable +from cuga.backend.cuga_graph.nodes.cuga_agent_core.policy.execution_policy import ( ExecutionRouter, split_execution_note, ) -from cuga.backend.cuga_graph.nodes.cuga_agent_core.runtime_tools import ( +from cuga.backend.cuga_graph.nodes.cuga_agent_core.tools.runtime_tools import ( build_runtime_tools, resolve_runtime_backends, ) @@ -51,7 +47,6 @@ is_find_tools_listing_markdown, ) from cuga.backend.cuga_graph.nodes.cuga_lite.model_runtime_profile import ( - resolve_bind_tools_fields, resolved_runtime_model_name, ) from cuga.backend.cuga_graph.nodes.cuga_lite.nl_auto_continue_classifier import ( @@ -59,7 +54,6 @@ normalize_assistant_text, ) from cuga.backend.cuga_graph.nodes.cuga_lite.prompt_utils import ( - PromptUtils, create_mcp_prompt, format_apps_for_prompt, normalize_mcp_few_shot_examples, @@ -67,7 +61,6 @@ ) from cuga.backend.cuga_graph.nodes.cuga_lite.reflection.reflection import reflection_task from cuga.backend.cuga_graph.nodes.cuga_lite.tool_approval_handler import ToolApprovalHandler -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ToolProviderInterface from cuga.backend.cuga_graph.nodes.task_decomposition_planning.analyze_task import TaskAnalyzer from cuga.backend.cuga_graph.policy.enactment import PolicyEnactment from cuga.backend.llm.errors import extract_code_from_tool_use_failed @@ -80,542 +73,36 @@ ) from cuga.config import settings -_llm_manager = LLMManager() - - -# ── Helpers (moved from cuga_lite_graph.py) ──────────────────────────────── - -# ── Bind-tools helpers (Task 1) ──────────────────────────────────────────── - - -def _bind_tools_mode_from_settings() -> str: - try: - m = getattr(settings.advanced_features, "cuga_lite_bind_tools_mode", None) - if m is not None and str(m).strip(): - return str(m).strip().lower() - except Exception: - pass - return "none" - - -def _bind_tools_apps_from_settings(): - try: - raw = getattr(settings.advanced_features, "cuga_lite_bind_tools_apps", None) - if raw is None: - return [] - if isinstance(raw, str): - return [raw.strip()] if raw.strip() else [] - if isinstance(raw, (list, tuple)): - return [str(x).strip() for x in raw if str(x).strip()] - except Exception: - pass - return [] - - -def _bind_tools_tool_names_from_settings(): - try: - raw = getattr(settings.advanced_features, "cuga_lite_bind_tools_tool_names", None) - if raw is None: - return [] - if isinstance(raw, str): - return [raw.strip()] if raw.strip() else [] - if isinstance(raw, (list, tuple)): - return [str(x).strip() for x in raw if str(x).strip()] - except Exception: - pass - return [] - - -def _bind_include_find_tools_from_config(cfg: Dict[str, Any]) -> bool: - v = cfg.get("cuga_lite_bind_tools_include_find_tools") - if v is None: - try: - v = getattr(settings.advanced_features, "cuga_lite_bind_tools_include_find_tools", False) - except Exception: - v = False - if isinstance(v, bool): - return v - if isinstance(v, str): - return v.strip().lower() in ("true", "1", "yes", "on") - return bool(v) - - -def _merge_find_tools_into_bound( - bound: List[StructuredTool], - seen: Set[str], - *, - include_find_tools: bool, - tools_context_ref: Optional[Dict[str, Any]], -) -> None: - if not include_find_tools: - return - ft = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") - if not ft: - return - name = getattr(ft, "name", None) or "" - if name and name not in seen: - seen.add(name) - bound.append(ft) - - -async def _indexed_provider_tools_first_wins( - tool_provider: ToolProviderInterface, -) -> Dict[str, StructuredTool]: - """Map tool name → StructuredTool using provider.get_all_tools (first occurrence wins).""" - try: - all_tools = await tool_provider.get_all_tools() - except Exception as e: - logger.warning("bind_tools: get_all_tools failed: %s", e) - return {} - by_name: Dict[str, StructuredTool] = {} - duplicates: Set[str] = set() - for t in all_tools or []: - n = getattr(t, "name", None) or "" - if not n: - continue - if n in by_name: - duplicates.add(n) - continue - by_name[n] = t - if duplicates: - logger.debug( - "bind_tools: duplicate tool names from provider (using first): %s", - sorted(duplicates), - ) - return by_name - - -async def _indexed_tools_for_native_bind( - tool_provider: ToolProviderInterface, - tools_context_ref: Optional[Dict[str, Any]], -) -> Dict[str, StructuredTool]: - """Registry MCP tools plus in-graph overlays (skills, OpenSandbox shell, todos, find_tools). - - ``run_command`` / ``write_file`` / etc. are not registered with ToolRegistryProvider; prepare - copies them onto ``tools_for_prompt`` only. Overlay must merge so ``cuga_lite_bind_tools_tool_names`` - can bind them by name. - """ - by_name = await _indexed_provider_tools_first_wins(tool_provider) - overlay = (tools_context_ref or {}).get("_lc_bind_tools_overlay_structured_tools") or [] - if not overlay: - return by_name - for t in overlay: - n = getattr(t, "name", None) or "" - if not n: - continue - by_name[n] = t - return by_name - - -async def resolve_model_with_bind_tools( - active_model: BaseChatModel, - *, - configurable: Optional[Dict[str, Any]], - tools_context_ref: Optional[Dict[str, Any]], - tool_provider: Optional[ToolProviderInterface], - model_name: Optional[str] = None, -) -> BaseChatModel: - """Optionally wrap ``active_model`` with ``bind_tools`` for native tool-calling tests. - - LangGraph ``config['configurable']`` overrides per-model runtime profile overrides TOML: - - - ``cuga_lite_bind_tools_mode``: ``none`` | ``find_tools`` | ``all`` | ``apps`` | ``tools`` | ``apps_and_tools`` - - ``cuga_lite_bind_tools_apps``: list of app names (``mode=apps`` or ``apps_and_tools``) - - ``cuga_lite_bind_tools_tool_names``: StructuredTool ``name`` values (``mode=tools`` or ``apps_and_tools``) - - ``cuga_lite_bind_tools_include_find_tools``: merge ``find_tools`` into ``all`` / ``apps`` / ``tools`` / ``apps_and_tools`` - - Profile ``gpt-oss-20b``: see ``model_runtime_profile.GPT_OSS_20B_RUNTIME_DEFAULTS``. - """ - cfg = configurable or {} - mn = (model_name or "").strip() - if not mn: - mn = resolved_runtime_model_name( - configurable_llm=cfg.get("llm"), - graph_default_model=active_model, - ) - mode, app_names, tool_names, include_find_tools = resolve_bind_tools_fields( - configurable, - mn, - settings_mode_fn=_bind_tools_mode_from_settings, - settings_apps_fn=_bind_tools_apps_from_settings, - settings_tool_names_fn=_bind_tools_tool_names_from_settings, - settings_include_fn=lambda: _bind_include_find_tools_from_config({}), - ) - - if mode in ("", "none", "false", "0", "off"): - if include_find_tools: - ft_only = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") - if ft_only: - return active_model.bind_tools([ft_only]) - return active_model - - try: - if mode == "find_tools": - ft = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") - if ft: - return active_model.bind_tools([ft]) - logger.debug( - "cuga_lite_bind_tools_mode=find_tools but find_tools StructuredTool is missing " - "(shortlisting may be off)" - ) - return active_model - - if mode == "all": - if not tool_provider: - logger.warning("cuga_lite_bind_tools_mode=all but tool_provider is missing") - return active_model - by_name = await _indexed_tools_for_native_bind(tool_provider, tools_context_ref) - bound = list(by_name.values()) - seen: Set[str] = {n for n in by_name} - _merge_find_tools_into_bound( - bound, seen, include_find_tools=include_find_tools, tools_context_ref=tools_context_ref - ) - if not bound: - return active_model - return active_model.bind_tools(bound) - - if mode == "apps_and_tools": - if not tool_provider: - logger.warning("cuga_lite_bind_tools_mode=apps_and_tools but tool_provider is missing") - return active_model - if not app_names and not tool_names: - if include_find_tools: - ft = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") - if ft: - return active_model.bind_tools([ft]) - logger.warning( - "cuga_lite_bind_tools_mode=apps_and_tools but cuga_lite_bind_tools_apps and " - "cuga_lite_bind_tools_tool_names are both empty " - "(set include_find_tools to bind find_tools only)" - ) - return active_model - - bound: List[StructuredTool] = [] - seen_names: Set[str] = set() - for app_name in app_names: - try: - for t in await tool_provider.get_tools(app_name): - name = getattr(t, "name", None) or "" - if name and name not in seen_names: - seen_names.add(name) - bound.append(t) - except Exception as e: - logger.warning("bind_tools apps_and_tools: get_tools(%s) failed: %s", app_name, e) - - by_name_lookup: Dict[str, StructuredTool] = {} - if tool_names: - by_name_lookup = await _indexed_tools_for_native_bind(tool_provider, tools_context_ref) - - missing: List[str] = [] - if tool_names: - for tn in tool_names: - if tn in seen_names: - continue - t = by_name_lookup.get(tn) - if t is None: - missing.append(tn) - continue - seen_names.add(tn) - bound.append(t) - if missing: - logger.warning( - "cuga_lite_bind_tools_tool_names not found among provider tools (skipped): %s", - missing, - ) +# ── Helpers (imported from cuga_lite/helpers/) ───────────────────────────── - _merge_find_tools_into_bound( - bound, seen_names, include_find_tools=include_find_tools, tools_context_ref=tools_context_ref - ) - if not bound: - return active_model - return active_model.bind_tools(bound) - - if mode == "apps": - if not app_names: - if include_find_tools: - ft = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") - if ft: - return active_model.bind_tools([ft]) - logger.warning( - "cuga_lite_bind_tools_mode=apps but cuga_lite_bind_tools_apps is empty " - "(set include_find_tools to bind find_tools only)" - ) - return active_model - if not tool_provider: - logger.warning("cuga_lite_bind_tools_mode=apps but tool_provider is missing") - return active_model - - bound = [] - seen: Set[str] = set() - for app_name in app_names: - try: - for t in await tool_provider.get_tools(app_name): - name = getattr(t, "name", None) or "" - if name and name not in seen: - seen.add(name) - bound.append(t) - except Exception as e: - logger.warning("bind_tools apps: get_tools(%s) failed: %s", app_name, e) - _merge_find_tools_into_bound( - bound, seen, include_find_tools=include_find_tools, tools_context_ref=tools_context_ref - ) - if not bound: - return active_model - return active_model.bind_tools(bound) - - if mode == "tools": - if not tool_names: - if include_find_tools: - ft = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") - if ft: - return active_model.bind_tools([ft]) - logger.warning( - "cuga_lite_bind_tools_mode=tools but cuga_lite_bind_tools_tool_names is empty " - "(set include_find_tools to bind find_tools only)" - ) - return active_model - if not tool_provider: - logger.warning("cuga_lite_bind_tools_mode=tools but tool_provider is missing") - return active_model - by_name = await _indexed_tools_for_native_bind(tool_provider, tools_context_ref) - if not by_name: - return active_model - bound = [] - seen: Set[str] = set() - missing: List[str] = [] - for tn in tool_names: - t = by_name.get(tn) - if t is None: - missing.append(tn) - continue - if tn not in seen: - seen.add(tn) - bound.append(t) - if missing: - logger.warning( - "cuga_lite_bind_tools_tool_names not found among provider tools (skipped): %s", - missing, - ) - _merge_find_tools_into_bound( - bound, seen, include_find_tools=include_find_tools, tools_context_ref=tools_context_ref - ) - if not bound: - return active_model - return active_model.bind_tools(bound) - - logger.warning( - "Unknown cuga_lite_bind_tools_mode: %s (use none|find_tools|all|apps|tools|apps_and_tools)", - mode, - ) - except Exception as e: - logger.warning("resolve_model_with_bind_tools failed: %s", e) - return active_model - - -# ── Find-tools helpers (Task 2) ──────────────────────────────────────────── - -_BUNDLED_FIND_TOOLS_FEW_SHOT_JSON = ( - Path(__file__).resolve().parent / "prompts" / "find_tools_few_shot_examples.json" +from cuga.backend.cuga_graph.nodes.cuga_lite.helpers.bind_tools import ( + resolve_model_with_bind_tools, +) +from cuga.backend.cuga_graph.nodes.cuga_lite.helpers.find_tools import ( + _first_user_message_text, + create_find_tools_tool, + _load_default_find_tools_few_shot_examples, + _ensure_web_app, + _web_search_enabled, +) +from cuga.backend.cuga_graph.nodes.cuga_lite.helpers.knowledge import ( + _get_knowledge_tool_scope_context, + _knowledge_scope_instruction, +) +from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.todos import ( + create_update_todos_tool, + extract_task_todos_from_new_vars, + format_current_plan_section, + format_task_todos_system_block, ) - -def _first_user_message_text(chat_messages: Optional[List[BaseMessage]]) -> Optional[str]: - if not chat_messages: - return None - for msg in chat_messages: - if isinstance(msg, HumanMessage): - raw = msg.content - text = raw.strip() if isinstance(raw, str) else str(raw).strip() - return text or None - return None - - -def _compose_find_tools_shortlister_query(query: str, initial_user_message: Optional[str]) -> str: - q = query.strip() - init = (initial_user_message or "").strip() - if not init: - return q - return f"Query: {q}\nTask context (initial user message): {init}" - - -def _web_search_enabled() -> bool: - return bool(getattr(settings.advanced_features, "enable_web_search", False)) - - -def _ensure_web_app(apps: List[Any], all_apps: List[Any]) -> List[Any]: - if not _web_search_enabled() or any(getattr(app, "name", None) == "web" for app in apps): - return apps - web_app = next((app for app in all_apps if getattr(app, "name", None) == "web"), None) - if web_app: - return [*apps, web_app] - return apps - - -async def create_find_tools_tool( - all_tools, - all_apps: List[Any], - app_to_tools_map: Optional[Dict[str, List[StructuredTool]]] = None, - llm: Optional[Any] = None, - initial_user_message: Optional[str] = None, -) -> StructuredTool: - """Create a find_tools StructuredTool for tool discovery. - - Args: - all_tools: All available tools to search through - all_apps: All available app definitions - app_to_tools_map: Optional mapping of app_name -> list of tools. If provided, used for filtering by app_name. - initial_user_message: First human message in the session; combined with the tool `query` for shortlisting. - - Returns: - StructuredTool configured for finding relevant tools - """ - - async def find_tools_func(query: str, app_name: str): - """Search for relevant tools from the connected applications based on a natural language query. - - Args: - query: Natural language query describing what tools are needed to accomplish the task can include also which parameters are needed or the output expected - app_name: Name of a specific app to filter tools from. Only searches tools from that app. - - Returns: - Top 4 matching tools with their details - """ - if app_to_tools_map and app_name in app_to_tools_map: - filtered_tools = app_to_tools_map[app_name] - else: - logger.warning( - f"App '{app_name}' not found in app_to_tools_map. Available apps: {list(app_to_tools_map.keys()) if app_to_tools_map else 'N/A'}" - ) - filtered_tools = [] - - filtered_apps = [app for app in all_apps if hasattr(app, 'name') and app.name == app_name] - - if not filtered_apps: - logger.warning( - f"App '{app_name}' not found in available apps. Available apps: {[app.name if hasattr(app, 'name') else str(app) for app in all_apps]}" - ) - - shortlister_query = _compose_find_tools_shortlister_query(query, initial_user_message) - - try: - return await PromptUtils.find_tools( - query=shortlister_query, all_tools=filtered_tools, all_apps=filtered_apps, llm=llm - ) - except OutputParserException as e: - logger.bind( - query_len=len(shortlister_query), - error_type=type(e).__name__, - ).opt(exception=True).warning( - "Tool shortlisting failed due to parser error; returning error to agent" - ) - return ( - f"Tool shortlisting failed due to malformed response: {e}. " - "Please retry with a different query." - ) - except Exception as e: - logger.bind( - query_len=len(shortlister_query), - error_type=type(e).__name__, - ).opt(exception=True).warning("Tool shortlisting failed unexpectedly; returning error to agent") - return ( - f"Tool shortlisting failed due to an internal error: {e}. " - "Please retry with a different query." - ) - - return StructuredTool.from_function( - func=find_tools_func, - name="find_tools", - description="Search for relevant tools from a specific connected application based on a natural language query. Use this when you need to discover what tools are available for a specific task within a specific application.", - ) - - -def _resolve_find_tools_few_shot_json_path() -> Optional[Path]: - if _BUNDLED_FIND_TOOLS_FEW_SHOT_JSON.is_file(): - return _BUNDLED_FIND_TOOLS_FEW_SHOT_JSON - return None - - -def _load_default_find_tools_few_shot_examples() -> List[Dict[str, str]]: - from cuga.backend.cuga_graph.nodes.cuga_lite.prompt_utils import normalize_mcp_few_shot_examples - - path = _resolve_find_tools_few_shot_json_path() - if path is None: - logger.debug( - "Find-tools few-shot JSON not found (expected packaged %s or repo samples copy); skipping", - _BUNDLED_FIND_TOOLS_FEW_SHOT_JSON, - ) - return [] - try: - import json as _json - - raw = _json.loads(path.read_text(encoding="utf-8")) - normalized = normalize_mcp_few_shot_examples(raw) - if normalized: - logger.info(f"Loaded {len(normalized)} find_tools MCP few-shot turn(s) from {path}") - return normalized - except (OSError, _json.JSONDecodeError) as e: - logger.warning(f"Could not load find_tools few-shot JSON from {path}: {e}") - return [] - - -# ── Knowledge helpers (Task 4b) ─────────────────────────────────────────── - - -def _get_knowledge_tool_scope_context( - engine: Any | None, - thread_id: str | None, -) -> tuple[tuple[str, ...], str | None]: - config = getattr(engine, "_config", None) if engine else None - if not config or not getattr(config, "enabled", False): - return (), None - - scopes: list[str] = [] - if getattr(config, "agent_level_enabled", True): - scopes.append("agent") - if getattr(config, "session_level_enabled", True) and thread_id: - scopes.append("session") - - default_scope = "agent" if "agent" in scopes else scopes[0] if scopes else None - return tuple(scopes), default_scope - - -def _knowledge_scope_instruction(allowed_scopes: tuple[str, ...], thread_id: str | None) -> str: - if allowed_scopes == ("agent",): - return ( - "Knowledge scope rules for this run: only agent-level knowledge is available. " - "Never call `knowledge_*` tools with `scope=\"session\"`." - ) - if allowed_scopes == ("session",): - return ( - "Knowledge scope rules for this run: only session-level knowledge is available. " - "Never call `knowledge_*` tools with `scope=\"agent\"`. The conversation thread context is injected automatically." - ) - if allowed_scopes == ("agent", "session"): - return ( - "Knowledge scope rules for this run: both knowledge scopes are available. " - "Use `scope=\"agent\"` for permanent agent documents and `scope=\"session\"` for this conversation's documents." - ) - if thread_id: - return "Knowledge tools are unavailable in this run. Do not call any `knowledge_*` tool." - return ( - "Knowledge tools are unavailable in this run. " - "Session scope cannot be used here because there is no conversation thread context." - ) - - -def _decorate_knowledge_tool(tool: Any, allowed_scopes: tuple[str, ...], thread_id: str | None) -> None: - """Add a brief scope hint to the tool description. - - The full scope rules are already in the system instructions, so we only - add a short reminder here to avoid bloating the prompt with repeated text. - """ - base_description = getattr(tool, "description", "") or "Knowledge tool" - scopes_str = ", ".join(f'"{s}"' for s in allowed_scopes) - hint = f"Allowed scopes: {scopes_str}. See knowledge scope rules in instructions above." - tool.description = f"{base_description}\n\n{hint}".strip() +_llm_manager = LLMManager() -# ── Reflection helper (Task 5a) ──────────────────────────────────────────── +def _clean_empty_response_retry_meta(meta: Optional[Dict[str, Any]]) -> Dict[str, Any]: + m = {**(meta or {})} + m.pop("_empty_response_correction", None) + return m def _reflection_current_task(state: Any) -> str: @@ -632,173 +119,6 @@ def _reflection_current_task(state: Any) -> str: return "" -# ── Todos helpers (Task 3) ───────────────────────────────────────────────── - - -class Todo(BaseModel): - """A single todo item with text and status.""" - - text: str = Field(..., description="The task description") - status: str = Field( - default="pending", - description="Status of the todo: 'pending', 'in_progress', or 'completed'", - ) - - -class TodosInput(BaseModel): - """Input schema for create_update_todos function.""" - - todos: List[Todo] = Field(..., description="List of todos, each with 'text' and 'status' fields") - - -class TodosOutput(BaseModel): - """Output schema for create_update_todos function.""" - - todos: List[Todo] = Field(..., description="List of todos with their current status") - - -def _try_parse_todos_payload(value: Any) -> Optional[List[Dict[str, Any]]]: - if not isinstance(value, dict) or "todos" not in value: - return None - raw = value["todos"] - if not isinstance(raw, list): - return None - if not raw: - return [] - if not all(isinstance(x, dict) and "text" in x and "status" in x for x in raw): - return None - return raw - - -def extract_task_todos_from_new_vars(new_vars: dict) -> Optional[List[Dict[str, Any]]]: - for val in new_vars.values(): - parsed = _try_parse_todos_payload(val) - if parsed is not None: - return parsed - return None - - -def _serialize_todos_for_store(todos_list: List[Any]) -> List[Dict[str, str]]: - out: List[Dict[str, str]] = [] - for t in todos_list: - if isinstance(t, Todo): - out.append({"text": t.text, "status": t.status}) - elif hasattr(t, "model_dump"): - d = t.model_dump() - out.append({"text": str(d.get("text", "")), "status": str(d.get("status", "pending"))}) - elif isinstance(t, dict): - out.append({"text": str(t.get("text", "")), "status": str(t.get("status", "pending"))}) - else: - out.append({"text": str(t), "status": "pending"}) - return out - - -async def create_update_todos_tool( - agent_state: Optional[Any] = None, - todos_store_ref: Optional[List[Dict[str, str]]] = None, -) -> StructuredTool: - """Create a create_update_todos StructuredTool for managing task todos. - - Args: - agent_state: Optional AgentState (reserved for future use) - todos_store_ref: Mutable list shared with the graph; latest todos are written here for the system prompt. - - Returns: - StructuredTool configured for creating and updating todos - """ - - async def create_update_todos_func(todos: Any) -> TodosOutput: - """Create or update a list of todos for complex multi-step tasks. - - Use this tool when you have a complex task that requires multiple steps. - This helps you track progress and organize your work. - - Args: - todos: List of todo dicts/models (matches ``TodosInput.todos`` / tool schema). - - Returns: - Short confirmation only (full list is shown in the system prompt via todos_store_ref). - """ - input_data = todos - # Handle different input types - if isinstance(input_data, TodosInput): - todos_list = input_data.todos - elif isinstance(input_data, dict): - # If it's a dict, check if it has 'todos' key - if 'todos' in input_data: - todos_list = input_data['todos'] - else: - # If no 'todos' key, treat the whole dict as a single todo or wrap it - todos_list = [input_data] - # Convert dict items to Todo models - todos_list = [Todo(**todo) if isinstance(todo, dict) else todo for todo in todos_list] - elif isinstance(input_data, list): - # If it's a list directly, convert each item to Todo - todos_list = [Todo(**todo) if isinstance(todo, dict) else todo for todo in input_data] - else: - # Fallback: try to create TodosInput - try: - if isinstance(input_data, dict): - input_data = TodosInput(**input_data) - else: - input_data = TodosInput(todos=input_data) - todos_list = input_data.todos - except Exception: - # Last resort: wrap in a list - todos_list = [Todo(**input_data) if isinstance(input_data, dict) else input_data] - - if todos_store_ref is not None: - serialized = _serialize_todos_for_store(todos_list) - todos_store_ref.clear() - todos_store_ref.extend(serialized) - - normalized = [t if isinstance(t, Todo) else Todo(**t) for t in todos_list] - return TodosOutput(todos=normalized) - - return StructuredTool.from_function( - func=create_update_todos_func, - name="create_update_todos", - description="Create or update a list of todos for complex multi-step tasks. Pass `todos` as a list of objects with 'text' and 'status' ('pending', 'in_progress', or 'completed'). Returns a todos payload; the full list is shown in the system prompt under 'Current task todos' (Current Plan).", - args_schema=TodosInput, - return_direct=False, - ) - - -def _clean_empty_response_retry_meta(meta: Optional[Dict[str, Any]]) -> Dict[str, Any]: - m = {**(meta or {})} - m.pop("_empty_response_correction", None) - return m - - -def format_task_todos_system_block(todos: List[Dict[str, str]]) -> str: - if not todos: - return "" - lines = [ - "", - "---", - "", - "## Current task todos", - "", - "Execution only prints **Todos updated** after each change; use this list as the source of truth.", - "", - ] - for i, item in enumerate(todos, start=1): - status = item.get("status", "pending") - text = item.get("text", "") - lines.append(f"{i}. **[{status}]** {text}") - lines.append("") - return "\n".join(lines) - - -def format_current_plan_section(task_todos: List[Dict[str, Any]]) -> str: - lines = ["## Current Plan", ""] - for item in task_todos: - text = str(item.get("text", "")).strip() - status = str(item.get("status", "pending")).strip() - lines.append(f"- **[{status}]** {text}") - return "\n".join(lines) + "\n" - - def _tool_call_kwarg_literal(value: Any) -> str: if isinstance(value, str): return json.dumps(value, ensure_ascii=False) @@ -1583,7 +903,7 @@ def build_sandbox_node(self, base_thread_id: Any, base_apps_list: Any): async def sandbox(state: Any, config: Optional[RunnableConfig] = None): """Execute code in sandbox and return results.""" - from cuga.backend.cuga_graph.nodes.cuga_lite.tool_call_tracker import ToolCallTracker + from cuga.backend.cuga_graph.nodes.cuga_lite.tracking.tracker import ToolCallTracker # Check if user denied approval (only if policies are enabled) if settings.policy.enabled: diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/cuga_lite_graph.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/cuga_lite_graph.py index 2554e2f7..511e86bb 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/cuga_lite_graph.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/cuga_lite_graph.py @@ -12,20 +12,20 @@ from langchain_core.messages import AIMessage, BaseMessage from langgraph.graph import StateGraph from langgraph.types import Command -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field from cuga.backend.activity_tracker.tracker import ActivityTracker -from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import ( +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.graph_nodes import ( CoreGraphAdapter, append_chat_messages_with_step_limit as _core_append_with_step_limit, create_error_command as _core_create_error_command, ) -from cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_graph import build_agent_graph -from cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_nodes import ( +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.shared_graph import build_agent_graph +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.shared_nodes import ( create_call_model_node as _create_shared_call_model_node, ) from cuga.backend.cuga_graph.nodes.cuga_lite.agent_graph_adapter import AgentGraphAdapter -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ToolProviderInterface +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.base import ToolProviderInterface from cuga.backend.cuga_graph.state.agent_state import AgentState from cuga.backend.llm.models import LLMManager from cuga.backend.llm.utils.helpers import load_one_prompt @@ -105,8 +105,7 @@ class CugaLiteState(BaseModel): default_factory=list ) # List of tracked tool calls (when track_tool_calls is enabled) - class Config: - arbitrary_types_allowed = True + model_config = ConfigDict(arbitrary_types_allowed=True) @property def variables_manager(self): diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/code_executor.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/code_executor.py index 8bb68500..71d16bf9 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/code_executor.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/code_executor.py @@ -13,7 +13,7 @@ from .opensandbox import OpenSandboxExecutor from .native import NativeSandboxExecutor from .base_executor import BaseExecutor, RemoteExecutor -from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution_policy import ExecutionPlan +from cuga.backend.cuga_graph.nodes.cuga_agent_core.policy.execution_policy import ExecutionPlan def _skills_enabled() -> bool: diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/common/call_api_helper.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/common/call_api_helper.py index bf7235eb..3fe22e2f 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/common/call_api_helper.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/common/call_api_helper.py @@ -47,7 +47,7 @@ def create_local_call_api_function() -> Callable: import aiohttp from cuga.backend.tools_env.registry.utils.api_utils import get_registry_base_url from cuga.backend.activity_tracker.tracker import ActivityTracker - from cuga.backend.cuga_graph.nodes.cuga_lite.tool_call_tracker import ToolCallTracker + from cuga.backend.cuga_graph.nodes.cuga_lite.tracking.tracker import ToolCallTracker tracker = ActivityTracker() diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_execution_plan_wiring.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_execution_plan_wiring.py index ffe62594..9dbdab49 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_execution_plan_wiring.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_execution_plan_wiring.py @@ -16,7 +16,7 @@ from cuga.backend.cuga_graph.state.agent_state import AgentState, VariablesManager from cuga.backend.cuga_graph.nodes.cuga_lite.executors import CodeExecutor -from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution_policy import ExecutionPlan +from cuga.backend.cuga_graph.nodes.cuga_agent_core.policy.execution_policy import ExecutionPlan def _plan(python_backend: str) -> ExecutionPlan: diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_extract_codeblocks.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_extract_codeblocks.py index db89e287..c9a089be 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_extract_codeblocks.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_extract_codeblocks.py @@ -1,4 +1,6 @@ -from cuga.backend.cuga_graph.nodes.cuga_agent_core.code_extraction import extract_and_combine_codeblocks +from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.code_extraction import ( + extract_and_combine_codeblocks, +) class TestExtractAndCombineCodeblocks: diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_sync_async_tools.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_sync_async_tools.py index d85b8248..94254d03 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_sync_async_tools.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_sync_async_tools.py @@ -11,7 +11,7 @@ from cuga.backend.cuga_graph.state.agent_state import AgentState, VariablesManager from cuga.backend.cuga_graph.nodes.cuga_lite.executors.code_executor import CodeExecutor -from cuga.backend.cuga_graph.nodes.cuga_agent_core.code_extraction import make_tool_awaitable +from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.code_extraction import make_tool_awaitable @pytest.fixture diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_tool_call_timeout.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_tool_call_timeout.py index aae3a017..9522b323 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_tool_call_timeout.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/executors/tests/test_tool_call_timeout.py @@ -8,7 +8,7 @@ from cuga.backend.cuga_graph.state.agent_state import AgentState, VariablesManager from cuga.backend.cuga_graph.nodes.cuga_lite.executors import CodeExecutor -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_registry_provider import call_api +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.registry import call_api @pytest.fixture @@ -81,7 +81,7 @@ async def test_call_api_timeout(): # Mock the HTTP call to simulate a timeout with patch( - 'cuga.backend.cuga_graph.nodes.cuga_lite.tool_registry_provider.aiohttp.ClientSession' + 'cuga.backend.cuga_graph.nodes.cuga_lite.providers.registry.aiohttp.ClientSession' ) as mock_session_class: # Create a mock post context manager that raises TimeoutError async def timeout_post_context(*args, **kwargs): diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/__init__.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/__init__.py new file mode 100644 index 00000000..8f0407f1 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/__init__.py @@ -0,0 +1,4 @@ +"""CugaLite Graph Helpers package. + +Contains extracted helper modules to keep the core agent graph adapter and node clean and focused. +""" diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/bind_tools.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/bind_tools.py new file mode 100644 index 00000000..a1c27792 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/bind_tools.py @@ -0,0 +1,340 @@ +"""Model and tool binding helper functions for CugaLite.""" + +from __future__ import annotations + +from typing import Any, Dict, List, Optional, Set + +from langchain_core.language_models import BaseChatModel +from langchain_core.tools import StructuredTool +from loguru import logger + +from cuga.config import settings +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.base import ToolProviderInterface +from cuga.backend.cuga_graph.nodes.cuga_lite.model_runtime_profile import ( + resolve_bind_tools_fields, + resolved_runtime_model_name, +) + + +def _bind_tools_mode_from_settings() -> str: + try: + m = getattr(settings.advanced_features, "cuga_lite_bind_tools_mode", None) + if m is not None and str(m).strip(): + return str(m).strip().lower() + except Exception: + pass + return "none" + + +def _bind_tools_apps_from_settings(): + try: + raw = getattr(settings.advanced_features, "cuga_lite_bind_tools_apps", None) + if raw is None: + return [] + if isinstance(raw, str): + return [raw.strip()] if raw.strip() else [] + if isinstance(raw, (list, tuple)): + return [str(x).strip() for x in raw if str(x).strip()] + except Exception: + pass + return [] + + +def _bind_tools_tool_names_from_settings(): + try: + raw = getattr(settings.advanced_features, "cuga_lite_bind_tools_tool_names", None) + if raw is None: + return [] + if isinstance(raw, str): + return [raw.strip()] if raw.strip() else [] + if isinstance(raw, (list, tuple)): + return [str(x).strip() for x in raw if str(x).strip()] + except Exception: + pass + return [] + + +def _bind_include_find_tools_from_config(cfg: Dict[str, Any]) -> bool: + v = cfg.get("cuga_lite_bind_tools_include_find_tools") + if v is None: + try: + v = getattr(settings.advanced_features, "cuga_lite_bind_tools_include_find_tools", False) + except Exception: + v = False + if isinstance(v, bool): + return v + if isinstance(v, str): + return v.strip().lower() in ("true", "1", "yes", "on") + return bool(v) + + +def _merge_find_tools_into_bound( + bound: List[StructuredTool], + seen: Set[str], + *, + include_find_tools: bool, + tools_context_ref: Optional[Dict[str, Any]], +) -> None: + if not include_find_tools: + return + ft = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") + if not ft: + return + name = getattr(ft, "name", None) or "" + if name and name not in seen: + seen.add(name) + bound.append(ft) + + +async def _indexed_provider_tools_first_wins( + tool_provider: ToolProviderInterface, +) -> Dict[str, StructuredTool]: + """Map tool name → StructuredTool using provider.get_all_tools (first occurrence wins).""" + try: + all_tools = await tool_provider.get_all_tools() + except Exception as e: + logger.warning("bind_tools: get_all_tools failed: %s", e) + return {} + by_name: Dict[str, StructuredTool] = {} + duplicates: Set[str] = set() + for t in all_tools or []: + n = getattr(t, "name", None) or "" + if not n: + continue + if n in by_name: + duplicates.add(n) + continue + by_name[n] = t + if duplicates: + logger.debug( + "bind_tools: duplicate tool names from provider (using first): %s", + sorted(duplicates), + ) + return by_name + + +async def _indexed_tools_for_native_bind( + tool_provider: ToolProviderInterface, + tools_context_ref: Optional[Dict[str, Any]], +) -> Dict[str, StructuredTool]: + """Registry MCP tools plus in-graph overlays (skills, OpenSandbox shell, todos, find_tools). + + ``run_command`` / ``write_file`` / etc. are not registered with ToolRegistryProvider; prepare + copies them onto ``tools_for_prompt`` only. Overlay must merge so ``cuga_lite_bind_tools_tool_names`` + can bind them by name. + """ + by_name = await _indexed_provider_tools_first_wins(tool_provider) + overlay = (tools_context_ref or {}).get("_lc_bind_tools_overlay_structured_tools") or [] + if not overlay: + return by_name + for t in overlay: + n = getattr(t, "name", None) or "" + if not n: + continue + by_name[n] = t + return by_name + + +async def resolve_model_with_bind_tools( + active_model: BaseChatModel, + *, + configurable: Optional[Dict[str, Any]], + tools_context_ref: Optional[Dict[str, Any]], + tool_provider: Optional[ToolProviderInterface], + model_name: Optional[str] = None, +) -> BaseChatModel: + """Optionally wrap ``active_model`` with ``bind_tools`` for native tool-calling tests. + + LangGraph ``config['configurable']`` overrides per-model runtime profile overrides TOML: + + - ``cuga_lite_bind_tools_mode``: ``none`` | ``find_tools`` | ``all`` | ``apps`` | ``tools`` | ``apps_and_tools`` + - ``cuga_lite_bind_tools_apps``: list of app names (``mode=apps`` or ``apps_and_tools``) + - ``cuga_lite_bind_tools_tool_names``: StructuredTool ``name`` values (``mode=tools`` or ``apps_and_tools``) + - ``cuga_lite_bind_tools_include_find_tools``: merge ``find_tools`` into ``all`` / ``apps`` / ``tools`` / ``apps_and_tools`` + + Profile ``gpt-oss-20b``: see ``model_runtime_profile.GPT_OSS_20B_RUNTIME_DEFAULTS``. + """ + cfg = configurable or {} + mn = (model_name or "").strip() + if not mn: + mn = resolved_runtime_model_name( + configurable_llm=cfg.get("llm"), + graph_default_model=active_model, + ) + mode, app_names, tool_names, include_find_tools = resolve_bind_tools_fields( + configurable, + mn, + settings_mode_fn=_bind_tools_mode_from_settings, + settings_apps_fn=_bind_tools_apps_from_settings, + settings_tool_names_fn=_bind_tools_tool_names_from_settings, + settings_include_fn=lambda: _bind_include_find_tools_from_config({}), + ) + + if mode in ("", "none", "false", "0", "off"): + if include_find_tools: + ft_only = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") + if ft_only: + return active_model.bind_tools([ft_only]) + return active_model + + try: + if mode == "find_tools": + ft = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") + if ft: + return active_model.bind_tools([ft]) + logger.debug( + "cuga_lite_bind_tools_mode=find_tools but find_tools StructuredTool is missing " + "(shortlisting may be off)" + ) + return active_model + + if mode == "all": + if not tool_provider: + logger.warning("cuga_lite_bind_tools_mode=all but tool_provider is missing") + return active_model + by_name = await _indexed_tools_for_native_bind(tool_provider, tools_context_ref) + bound = list(by_name.values()) + seen: Set[str] = {n for n in by_name} + _merge_find_tools_into_bound( + bound, seen, include_find_tools=include_find_tools, tools_context_ref=tools_context_ref + ) + if not bound: + return active_model + return active_model.bind_tools(bound) + + if mode == "apps_and_tools": + if not tool_provider: + logger.warning("cuga_lite_bind_tools_mode=apps_and_tools but tool_provider is missing") + return active_model + if not app_names and not tool_names: + if include_find_tools: + ft = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") + if ft: + return active_model.bind_tools([ft]) + logger.warning( + "cuga_lite_bind_tools_mode=apps_and_tools but cuga_lite_bind_tools_apps and " + "cuga_lite_bind_tools_tool_names are both empty " + "(set include_find_tools to bind find_tools only)" + ) + return active_model + + bound: List[StructuredTool] = [] + seen_names: Set[str] = set() + for app_name in app_names: + try: + for t in await tool_provider.get_tools(app_name): + name = getattr(t, "name", None) or "" + if name and name not in seen_names: + seen_names.add(name) + bound.append(t) + except Exception as e: + logger.warning("bind_tools apps_and_tools: get_tools(%s) failed: %s", app_name, e) + + by_name_lookup: Dict[str, StructuredTool] = {} + if tool_names: + by_name_lookup = await _indexed_tools_for_native_bind(tool_provider, tools_context_ref) + + missing: List[str] = [] + if tool_names: + for tn in tool_names: + if tn in seen_names: + continue + t = by_name_lookup.get(tn) + if t is None: + missing.append(tn) + continue + seen_names.add(tn) + bound.append(t) + if missing: + logger.warning( + "cuga_lite_bind_tools_tool_names not found among provider tools (skipped): %s", + missing, + ) + + _merge_find_tools_into_bound( + bound, seen_names, include_find_tools=include_find_tools, tools_context_ref=tools_context_ref + ) + if not bound: + return active_model + return active_model.bind_tools(bound) + + if mode == "apps": + if not app_names: + if include_find_tools: + ft = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") + if ft: + return active_model.bind_tools([ft]) + logger.warning( + "cuga_lite_bind_tools_mode=apps but cuga_lite_bind_tools_apps is empty " + "(set include_find_tools to bind find_tools only)" + ) + return active_model + if not tool_provider: + logger.warning("cuga_lite_bind_tools_mode=apps but tool_provider is missing") + return active_model + + bound = [] + seen: Set[str] = set() + for app_name in app_names: + try: + for t in await tool_provider.get_tools(app_name): + name = getattr(t, "name", None) or "" + if name and name not in seen: + seen.add(name) + bound.append(t) + except Exception as e: + logger.warning("bind_tools apps: get_tools(%s) failed: %s", app_name, e) + _merge_find_tools_into_bound( + bound, seen, include_find_tools=include_find_tools, tools_context_ref=tools_context_ref + ) + if not bound: + return active_model + return active_model.bind_tools(bound) + + if mode == "tools": + if not tool_names: + if include_find_tools: + ft = (tools_context_ref or {}).get("_lc_bind_tools_find_tools") + if ft: + return active_model.bind_tools([ft]) + logger.warning( + "cuga_lite_bind_tools_mode=tools but cuga_lite_bind_tools_tool_names is empty " + "(set include_find_tools to bind find_tools only)" + ) + return active_model + if not tool_provider: + logger.warning("cuga_lite_bind_tools_mode=tools but tool_provider is missing") + return active_model + by_name = await _indexed_tools_for_native_bind(tool_provider, tools_context_ref) + if not by_name: + return active_model + bound = [] + seen: Set[str] = set() + missing: List[str] = [] + for tn in tool_names: + t = by_name.get(tn) + if t is None: + missing.append(tn) + continue + if tn not in seen: + seen.add(tn) + bound.append(t) + if missing: + logger.warning( + "cuga_lite_bind_tools_tool_names not found among provider tools (skipped): %s", + missing, + ) + _merge_find_tools_into_bound( + bound, seen, include_find_tools=include_find_tools, tools_context_ref=tools_context_ref + ) + if not bound: + return active_model + return active_model.bind_tools(bound) + + logger.warning( + "Unknown cuga_lite_bind_tools_mode: %s (use none|find_tools|all|apps|tools|apps_and_tools)", + mode, + ) + except Exception as e: + logger.warning("resolve_model_with_bind_tools failed: %s", e) + return active_model diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/find_tools.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/find_tools.py new file mode 100644 index 00000000..50aa27b1 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/find_tools.py @@ -0,0 +1,156 @@ +"""Helper functions for natural language tool shortlisting (find_tools).""" + +from __future__ import annotations + +import json as _json +from pathlib import Path +from typing import Any, Dict, List, Optional + +from langchain_core.exceptions import OutputParserException +from langchain_core.messages import BaseMessage, HumanMessage +from langchain_core.tools import StructuredTool +from loguru import logger + +from cuga.config import settings +from cuga.backend.cuga_graph.nodes.cuga_lite.prompt_utils import PromptUtils + +_BUNDLED_FIND_TOOLS_FEW_SHOT_JSON = ( + Path(__file__).resolve().parent.parent / "prompts" / "find_tools_few_shot_examples.json" +) + + +def _first_user_message_text(chat_messages: Optional[List[BaseMessage]]) -> Optional[str]: + if not chat_messages: + return None + for msg in chat_messages: + if isinstance(msg, HumanMessage): + raw = msg.content + text = raw.strip() if isinstance(raw, str) else str(raw).strip() + return text or None + return None + + +def _compose_find_tools_shortlister_query(query: str, initial_user_message: Optional[str]) -> str: + q = query.strip() + init = (initial_user_message or "").strip() + if not init: + return q + return f"Query: {q}\nTask context (initial user message): {init}" + + +def _web_search_enabled() -> bool: + return bool(getattr(settings.advanced_features, "enable_web_search", False)) + + +def _ensure_web_app(apps: List[Any], all_apps: List[Any]) -> List[Any]: + if not _web_search_enabled() or any(getattr(app, "name", None) == "web" for app in apps): + return apps + web_app = next((app for app in all_apps if getattr(app, "name", None) == "web"), None) + if web_app: + return [*apps, web_app] + return apps + + +async def create_find_tools_tool( + all_tools, + all_apps: List[Any], + app_to_tools_map: Optional[Dict[str, List[StructuredTool]]] = None, + llm: Optional[Any] = None, + initial_user_message: Optional[str] = None, +) -> StructuredTool: + """Create a find_tools StructuredTool for tool discovery. + + Args: + all_tools: All available tools to search through + all_apps: All available app definitions + app_to_tools_map: Optional mapping of app_name -> list of tools. If provided, used for filtering by app_name. + initial_user_message: First human message in the session; combined with the tool `query` for shortlisting. + + Returns: + StructuredTool configured for finding relevant tools + """ + + async def find_tools_func(query: str, app_name: str): + """Search for relevant tools from the connected applications based on a natural language query. + + Args: + query: Natural language query describing what tools are needed to accomplish the task can include also which parameters are needed or the output expected + app_name: Name of a specific app to filter tools from. Only searches tools from that app. + + Returns: + Top 4 matching tools with their details + """ + if app_to_tools_map and app_name in app_to_tools_map: + filtered_tools = app_to_tools_map[app_name] + else: + logger.warning( + f"App '{app_name}' not found in app_to_tools_map. Available apps: {list(app_to_tools_map.keys()) if app_to_tools_map else 'N/A'}" + ) + filtered_tools = [] + + filtered_apps = [app for app in all_apps if hasattr(app, 'name') and app.name == app_name] + + if not filtered_apps: + logger.warning( + f"App '{app_name}' not found in available apps. Available apps: {[app.name if hasattr(app, 'name') else str(app) for app in all_apps]}" + ) + + shortlister_query = _compose_find_tools_shortlister_query(query, initial_user_message) + + try: + return await PromptUtils.find_tools( + query=shortlister_query, all_tools=filtered_tools, all_apps=filtered_apps, llm=llm + ) + except OutputParserException as e: + logger.bind( + query_len=len(shortlister_query), + error_type=type(e).__name__, + ).opt(exception=True).warning( + "Tool shortlisting failed due to parser error; returning error to agent" + ) + return ( + f"Tool shortlisting failed due to malformed response: {e}. " + "Please retry with a different query." + ) + except Exception as e: + logger.bind( + query_len=len(shortlister_query), + error_type=type(e).__name__, + ).opt(exception=True).warning("Tool shortlisting failed unexpectedly; returning error to agent") + return ( + f"Tool shortlisting failed due to an internal error: {e}. " + "Please retry with a different query." + ) + + return StructuredTool.from_function( + func=find_tools_func, + name="find_tools", + description="Search for relevant tools from a specific connected application based on a natural language query. Use this when you need to discover what tools are available for a specific task within a specific application.", + ) + + +def _resolve_find_tools_few_shot_json_path() -> Optional[Path]: + if _BUNDLED_FIND_TOOLS_FEW_SHOT_JSON.is_file(): + return _BUNDLED_FIND_TOOLS_FEW_SHOT_JSON + return None + + +def _load_default_find_tools_few_shot_examples() -> List[Dict[str, str]]: + from cuga.backend.cuga_graph.nodes.cuga_lite.prompt_utils import normalize_mcp_few_shot_examples + + path = _resolve_find_tools_few_shot_json_path() + if path is None: + logger.debug( + "Find-tools few-shot JSON not found (expected packaged %s or repo samples copy); skipping", + _BUNDLED_FIND_TOOLS_FEW_SHOT_JSON, + ) + return [] + try: + raw = _json.loads(path.read_text(encoding="utf-8")) + normalized = normalize_mcp_few_shot_examples(raw) + if normalized: + logger.info(f"Loaded {len(normalized)} find_tools MCP few-shot turn(s) from {path}") + return normalized + except (OSError, _json.JSONDecodeError) as e: + logger.warning(f"Could not load find_tools few-shot JSON from {path}: {e}") + return [] diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/knowledge.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/knowledge.py new file mode 100644 index 00000000..99608bc8 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/knowledge.py @@ -0,0 +1,59 @@ +"""Knowledge scoping helper functions for CugaLite.""" + +from __future__ import annotations + +from typing import Any + + +def _get_knowledge_tool_scope_context( + engine: Any | None, + thread_id: str | None, +) -> tuple[tuple[str, ...], str | None]: + config = getattr(engine, "_config", None) if engine else None + if not config or not getattr(config, "enabled", False): + return (), None + + scopes: list[str] = [] + if getattr(config, "agent_level_enabled", True): + scopes.append("agent") + if getattr(config, "session_level_enabled", True) and thread_id: + scopes.append("session") + + default_scope = "agent" if "agent" in scopes else scopes[0] if scopes else None + return tuple(scopes), default_scope + + +def _knowledge_scope_instruction(allowed_scopes: tuple[str, ...], thread_id: str | None) -> str: + if allowed_scopes == ("agent",): + return ( + "Knowledge scope rules for this run: only agent-level knowledge is available. " + "Never call `knowledge_*` tools with `scope=\"session\"`." + ) + if allowed_scopes == ("session",): + return ( + "Knowledge scope rules for this run: only session-level knowledge is available. " + "Never call `knowledge_*` tools with `scope=\"agent\"`. The conversation thread context is injected automatically." + ) + if allowed_scopes == ("agent", "session"): + return ( + "Knowledge scope rules for this run: both knowledge scopes are available. " + "Use `scope=\"agent\"` for permanent agent documents and `scope=\"session\"` for this conversation's documents." + ) + if thread_id: + return "Knowledge tools are unavailable in this run. Do not call any `knowledge_*` tool." + return ( + "Knowledge tools are unavailable in this run. " + "Session scope cannot be used here because there is no conversation thread context." + ) + + +def _decorate_knowledge_tool(tool: Any, allowed_scopes: tuple[str, ...], thread_id: str | None) -> None: + """Add a brief scope hint to the tool description. + + The full scope rules are already in the system instructions, so we only + add a short reminder here to avoid bloating the prompt with repeated text. + """ + base_description = getattr(tool, "description", "") or "Knowledge tool" + scopes_str = ", ".join(f'"{s}"' for s in allowed_scopes) + hint = f"Allowed scopes: {scopes_str}. See knowledge scope rules in instructions above." + tool.description = f"{base_description}\n\n{hint}".strip() diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/prompt_utils.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/prompt_utils.py index 71563c54..a25fcc41 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/prompt_utils.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/prompt_utils.py @@ -11,7 +11,7 @@ from loguru import logger from pydantic import BaseModel, Field from langchain_core.tools import StructuredTool -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import AppDefinition +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.base import AppDefinition from cuga.backend.llm.utils.helpers import create_chat_prompt_from_templates from cuga.backend.cuga_graph.nodes.cuga_lite.model_runtime_profile import runtime_defaults_for_model @@ -105,7 +105,10 @@ def get_tool_params_str(tool: StructuredTool) -> str: """ if hasattr(tool, 'args_schema') and tool.args_schema: try: - schema = tool.args_schema.schema() + if hasattr(tool.args_schema, 'model_json_schema'): + schema = tool.args_schema.model_json_schema() + else: + schema = tool.args_schema.schema() properties = schema.get('properties', {}) required = schema.get('required', []) @@ -172,7 +175,10 @@ def get_tool_docs(tool: StructuredTool) -> tuple[str, str]: if hasattr(tool, 'args_schema') and tool.args_schema: try: - schema = tool.args_schema.schema() + if hasattr(tool.args_schema, 'model_json_schema'): + schema = tool.args_schema.model_json_schema() + else: + schema = tool.args_schema.schema() properties = schema.get('properties', {}) required = schema.get('required', []) diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/providers/__init__.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/providers/__init__.py new file mode 100644 index 00000000..33b9c1d3 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/providers/__init__.py @@ -0,0 +1,4 @@ +"""CugaLite Tool Providers subpackage. + +Contains tool providers for registry, custom, langchain, and composite tools. +""" diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_provider_interface.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/providers/base.py similarity index 78% rename from src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_provider_interface.py rename to src/cuga/backend/cuga_graph/nodes/cuga_lite/providers/base.py index 94a6806a..cf45b693 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_provider_interface.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/providers/base.py @@ -1,13 +1,14 @@ -""" -Tool Provider Interface +"""Tool Provider Interface Defines the interface for providing tools to CugaAgent from different sources. """ +from __future__ import annotations + from abc import ABC, abstractmethod from typing import List, Optional -from pydantic import BaseModel from langchain_core.tools import StructuredTool +from pydantic import BaseModel class AppDefinition(BaseModel): @@ -20,8 +21,7 @@ class AppDefinition(BaseModel): class ToolProviderInterface(ABC): - """ - Abstract interface for tool providers. + """Abstract interface for tool providers. Implementations provide tools from different sources: - ToolRegistryProvider: Tools from the MCP registry (separate process) @@ -30,8 +30,7 @@ class ToolProviderInterface(ABC): @abstractmethod async def get_apps(self) -> List[AppDefinition]: - """ - Get list of available applications/services. + """Get list of available applications/services. Returns: List of AppDefinition objects with app metadata @@ -40,8 +39,7 @@ async def get_apps(self) -> List[AppDefinition]: @abstractmethod async def get_tools(self, app_name: str) -> List[StructuredTool]: - """ - Get tools for a specific application. + """Get tools for a specific application. Args: app_name: Name of the application @@ -53,8 +51,7 @@ async def get_tools(self, app_name: str) -> List[StructuredTool]: @abstractmethod async def get_all_tools(self) -> List[StructuredTool]: - """ - Get all available tools from all applications. + """Get all available tools from all applications. Returns: List of all LangChain StructuredTool objects @@ -63,7 +60,5 @@ async def get_all_tools(self) -> List[StructuredTool]: @abstractmethod async def initialize(self): - """ - Initialize the tool provider (e.g., connect to registry, validate tools). - """ + """Initialize the tool provider (e.g., connect to registry, validate tools).""" pass diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/combined_tool_provider.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/providers/combined.py similarity index 84% rename from src/cuga/backend/cuga_graph/nodes/cuga_lite/combined_tool_provider.py rename to src/cuga/backend/cuga_graph/nodes/cuga_lite/providers/combined.py index c9356551..bdaa6bc9 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/combined_tool_provider.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/providers/combined.py @@ -1,27 +1,27 @@ -""" -Combined Tool Provider +"""Combined Tool Provider Provides tools from both runtime tracker tools and registry. First checks tracker for runtime tools, then falls back to registry. """ -from typing import List, Dict, Optional, Any, Callable, Tuple -import aiohttp -import asyncio +from __future__ import annotations -from loguru import logger +import asyncio +from typing import Any, Callable, Dict, List, Optional, Tuple +import aiohttp from langchain_core.tools import StructuredTool +from loguru import logger from cuga.backend.activity_tracker.tracker import ActivityTracker -from cuga.backend.tools_env.registry.utils.api_utils import get_apps, get_registry_base_url, get_agent_id -from cuga.backend.tools_env.registry.utils.types import AppDefinition -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_call_args import merge_tool_call_args -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ( +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.base import ( + AppDefinition, ToolProviderInterface, ) -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_registry_provider import ( +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.registry import ( create_tool_from_api_dict, ) +from cuga.backend.cuga_graph.nodes.cuga_lite.tracking.arguments import merge_tool_call_args +from cuga.backend.tools_env.registry.utils.api_utils import get_agent_id, get_apps, get_registry_base_url from cuga.config import settings @@ -36,11 +36,11 @@ def create_tool_from_tracker(tool_name: str, tool_def: Dict[str, Any], app_name: Returns: StructuredTool instance that calls tracker.invoke_tool """ - from pydantic import create_model, Field + from pydantic import Field, create_model - description = tool_def.get('description', '') - parameters = tool_def.get('parameters', {}) - operation_id = tool_def.get('operation_id') # Original OpenAPI operationId if available + description = tool_def.get("description", "") + parameters = tool_def.get("parameters", {}) + operation_id = tool_def.get("operation_id") # Original OpenAPI operationId if available # Convert OpenAPI parameter format to JSON schema format if needed if isinstance(parameters, list): @@ -49,56 +49,56 @@ def _convert_openapi_params_to_json_schema(params): properties = {} required = [] for param in params: - name = param.get('name', '') - param_type = param.get('schema', {}).get('type', 'string') - param_desc = param.get('description', '') - properties[name] = {'type': param_type, 'description': param_desc} + name = param.get("name", "") + param_type = param.get("schema", {}).get("type", "string") + param_desc = param.get("description", "") + properties[name] = {"type": param_type, "description": param_desc} # Handle constraints - constraints = param.get('constraints', []) + constraints = param.get("constraints", []) if constraints: - properties[name]['constraints'] = constraints + properties[name]["constraints"] = constraints - if param.get('required', False): + if param.get("required", False): required.append(name) - return {'properties': properties, 'required': required} + return {"properties": properties, "required": required} parameters = _convert_openapi_params_to_json_schema(parameters) field_definitions = {} param_constraints = {} if isinstance(parameters, dict): - if 'properties' in parameters: - props = parameters['properties'] - required = parameters.get('required', []) + if "properties" in parameters: + props = parameters["properties"] + required = parameters.get("required", []) for param_name, param_schema in props.items(): - param_type = param_schema.get('type', 'string') - param_desc = param_schema.get('description', '') + param_type = param_schema.get("type", "string") + param_desc = param_schema.get("description", "") # Handle type that might be a list (e.g., ['string', 'null']) if isinstance(param_type, list): # Take the first non-null type, or default to 'string' - param_type = next((t for t in param_type if t != 'null'), 'string') + param_type = next((t for t in param_type if t != "null"), "string") type_mapping = { - 'string': str, - 'integer': int, - 'number': float, - 'boolean': bool, - 'array': list, - 'object': dict, + "string": str, + "integer": int, + "number": float, + "boolean": bool, + "array": list, + "object": dict, } python_type = type_mapping.get(param_type, str) # Store constraints for later use in prompt - constraints = param_schema.get('constraints', []) + constraints = param_schema.get("constraints", []) if constraints: param_constraints[param_name] = constraints if param_name in required: field_definitions[param_name] = (python_type, Field(..., description=param_desc)) else: - default_val = param_schema.get('default', None) + default_val = param_schema.get("default", None) # Make sure default values are hashable if needed if isinstance(default_val, list): default_val = None # Skip unhashable defaults @@ -117,7 +117,7 @@ def _convert_openapi_params_to_json_schema(params): async def tool_func(*args, **kwargs): import time - from cuga.backend.cuga_graph.nodes.cuga_lite.tool_call_tracker import ToolCallTracker + from cuga.backend.cuga_graph.nodes.cuga_lite.tracking.tracker import ToolCallTracker start_time = time.time() result = None @@ -128,7 +128,7 @@ async def tool_func(*args, **kwargs): all_kwargs = merge_tool_call_args(args, kwargs, param_names) # Use tracker.invoke_tool with timeout - timeout_seconds = getattr(settings.advanced_features, 'tool_call_timeout', 30) + timeout_seconds = getattr(settings.advanced_features, "tool_call_timeout", 30) try: result = await asyncio.wait_for( tracker.invoke_tool(app_name, tool_name, all_kwargs), timeout=timeout_seconds @@ -148,7 +148,7 @@ async def tool_func(*args, **kwargs): duration_ms = (time.time() - start_time) * 1000 ToolCallTracker.record_call( tool_name=tool_name, - arguments=all_kwargs if 'all_kwargs' in dir() else {}, + arguments=all_kwargs if "all_kwargs" in dir() else {}, result=result, app_name=app_name, operation_id=_operation_id, @@ -165,7 +165,7 @@ async def tool_func(*args, **kwargs): tool.func = tool_func - if not hasattr(tool.func, '_param_constraints'): + if not hasattr(tool.func, "_param_constraints"): tool.func._param_constraints = param_constraints # Store metadata for tool call tracking @@ -179,8 +179,7 @@ async def tool_func(*args, **kwargs): class CombinedToolProvider(ToolProviderInterface): - """ - Tool provider that combines runtime tools from tracker and registry tools. + """Tool provider that combines runtime tools from tracker and registry tools. First checks tracker for runtime tools, then tries registry with try/catch. """ @@ -191,8 +190,7 @@ def __init__( get_include_by_app: Optional[Callable[[], Tuple[Optional[Dict[str, List[str]]], int]]] = None, agent_id: Optional[str] = None, ): - """ - Initialize the combined tool provider. + """Initialize the combined tool provider. Args: app_names: Optional list of specific app names to load. If None, loads all. @@ -214,7 +212,7 @@ async def initialize(self): logger.info(f"Initializing CombinedToolProvider (agent_id={self.agent_id})...") tracker_apps = [] - if hasattr(tracker, 'apps') and tracker.apps: + if hasattr(tracker, "apps") and tracker.apps: tracker_apps = tracker.apps registry_apps = [] @@ -239,8 +237,8 @@ async def initialize(self): AppDefinition( name=app.name, url=app.url, - description=getattr(app, 'description', None), - type=getattr(app, 'type', 'api'), + description=getattr(app, "description", None), + type=getattr(app, "type", "api"), ) for app in filtered_apps ] @@ -249,8 +247,8 @@ async def initialize(self): AppDefinition( name=app.name, url=app.url, - description=getattr(app, 'description', None), - type=getattr(app, 'type', 'api'), + description=getattr(app, "description", None), + type=getattr(app, "type", "api"), ) for app in all_apps ] @@ -280,7 +278,7 @@ async def get_apps(self) -> List[AppDefinition]: # Fetch fresh apps list (cheap — a few items) try: fresh: List[AppDefinition] = [] - if hasattr(tracker, 'apps') and tracker.apps: + if hasattr(tracker, "apps") and tracker.apps: fresh.extend(tracker.apps) if settings.advanced_features.registry: fresh.extend(await get_apps(agent_id=self.agent_id)) @@ -293,7 +291,7 @@ async def get_apps(self) -> List[AppDefinition]: AppDefinition( name=a.name, url=a.url, - description=getattr(a, 'description', None), + description=getattr(a, "description", None), ) for a in fresh ] @@ -319,8 +317,7 @@ def _filter_tools_by_include( return out async def get_tools(self, app_name: str) -> List[StructuredTool]: - """ - Get tools for a specific application. + """Get tools for a specific application. First checks tracker for runtime tools, then tries registry. If get_include_by_app is set, filters to only tools in the include list for this app. @@ -381,14 +378,14 @@ async def get_tools(self, app_name: str) -> List[StructuredTool]: try: logger.debug(f"Getting tools from registry for: {app_name} (agent_id={self.agent_id})") registry_base = get_registry_base_url() - url = f'{registry_base}/applications/{app_name}/apis?include_response_schema=true' + url = f"{registry_base}/applications/{app_name}/apis?include_response_schema=true" # Add agent_id parameter if available agent_id = self.agent_id or get_agent_id() if agent_id: - url += f'&agent_id={agent_id}' + url += f"&agent_id={agent_id}" - headers = {'accept': 'application/json'} + headers = {"accept": "application/json"} async with aiohttp.ClientSession() as session: async with session.get(url, headers=headers) as response: diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/direct_langchain_tools_provider.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/providers/langchain.py similarity index 82% rename from src/cuga/backend/cuga_graph/nodes/cuga_lite/direct_langchain_tools_provider.py rename to src/cuga/backend/cuga_graph/nodes/cuga_lite/providers/langchain.py index 7acdf894..eda02826 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/direct_langchain_tools_provider.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/providers/langchain.py @@ -1,22 +1,22 @@ -""" -Direct LangChain Tools Provider +"""Direct LangChain Tools Provider Provides LangChain tools that are passed directly at runtime (in-process). """ +from __future__ import annotations + from typing import List, Optional +from langchain_core.tools import BaseTool, StructuredTool from loguru import logger -from langchain_core.tools import StructuredTool, BaseTool -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ( - ToolProviderInterface, +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.base import ( AppDefinition, + ToolProviderInterface, ) class DirectLangChainToolsProvider(ToolProviderInterface): - """ - Tool provider for direct LangChain tools (in-process). + """Tool provider for direct LangChain tools (in-process). This provider accepts LangChain tools directly at initialization time. Useful when CUGA is embedded as a component in another system. @@ -36,8 +36,7 @@ def my_tool(query: str) -> str: """ def __init__(self, tools: Optional[List[BaseTool]] = None, app_name: str = "runtime_tools"): - """ - Initialize the direct tools provider. + """Initialize the direct tools provider. Args: tools: List of LangChain BaseTool or StructuredTool instances @@ -58,18 +57,18 @@ def _validate_tools(self): f"Got {type(tool).__name__}, expected BaseTool or StructuredTool." ) - if not hasattr(tool, 'name') or not tool.name: + if not hasattr(tool, "name") or not tool.name: raise ValueError(f"Tool at index {i} is missing a name") - if isinstance(tool, StructuredTool) and not hasattr(tool, 'func'): - if not hasattr(tool, 'coroutine') and not hasattr(tool, '_run'): + if isinstance(tool, StructuredTool) and not hasattr(tool, "func"): + if not hasattr(tool, "coroutine") and not hasattr(tool, "_run"): logger.warning( f"StructuredTool '{tool.name}' is missing .func attribute. " f"Adding it for CodeAct compatibility." ) - if hasattr(tool, 'coroutine') and tool.coroutine: + if hasattr(tool, "coroutine") and tool.coroutine: tool.func = tool.coroutine - elif hasattr(tool, '_run'): + elif hasattr(tool, "_run"): tool.func = tool._run async def initialize(self): @@ -87,8 +86,7 @@ async def initialize(self): self.initialized = True async def get_apps(self) -> List[AppDefinition]: - """ - Get list of applications (single virtual app for runtime tools). + """Get list of applications (single virtual app for runtime tools). Returns: List with one AppDefinition representing the runtime tools @@ -106,8 +104,7 @@ async def get_apps(self) -> List[AppDefinition]: ] async def get_tools(self, app_name: str) -> List[StructuredTool]: - """ - Get tools for the specified app. + """Get tools for the specified app. Args: app_name: Name of the application (should match self.app_name) @@ -125,8 +122,7 @@ async def get_tools(self, app_name: str) -> List[StructuredTool]: return self.tools async def get_all_tools(self) -> List[StructuredTool]: - """ - Get all available tools. + """Get all available tools. Returns: List of all LangChain tools @@ -137,8 +133,7 @@ async def get_all_tools(self) -> List[StructuredTool]: return self.tools def add_tool(self, tool: BaseTool): - """ - Add a tool dynamically after initialization. + """Add a tool dynamically after initialization. Args: tool: LangChain BaseTool or StructuredTool instance @@ -150,8 +145,7 @@ def add_tool(self, tool: BaseTool): logger.info(f"Added tool '{tool.name}' to DirectLangChainToolsProvider") def add_tools(self, tools: List[BaseTool]): - """ - Add multiple tools dynamically after initialization. + """Add multiple tools dynamically after initialization. Args: tools: List of LangChain BaseTool or StructuredTool instances diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_registry_provider.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/providers/registry.py similarity index 82% rename from src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_registry_provider.py rename to src/cuga/backend/cuga_graph/nodes/cuga_lite/providers/registry.py index 7526b28c..98f9b973 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_registry_provider.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/providers/registry.py @@ -1,23 +1,24 @@ -""" -Tool Registry Provider +"""Tool Registry Provider Provides tools from the MCP registry (separate process). """ -import aiohttp -import json +from __future__ import annotations + import asyncio -from typing import List, Dict, Any, Optional -from loguru import logger -from pydantic import create_model, Field +import json +from typing import Any, Dict, List, Optional +import aiohttp from langchain_core.tools import StructuredTool +from loguru import logger +from pydantic import Field, create_model -from cuga.backend.tools_env.registry.utils.api_utils import get_apis, get_apps, get_registry_base_url -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_call_args import merge_tool_call_args -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ( - ToolProviderInterface, +from cuga.backend.cuga_graph.nodes.cuga_lite.tracking.arguments import merge_tool_call_args +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.base import ( AppDefinition, + ToolProviderInterface, ) +from cuga.backend.tools_env.registry.utils.api_utils import get_apis, get_apps, get_registry_base_url from cuga.config import settings @@ -41,21 +42,21 @@ async def call_api( The API response """ import time - from cuga.backend.cuga_graph.nodes.cuga_lite.tool_call_tracker import ToolCallTracker + from cuga.backend.cuga_graph.nodes.cuga_lite.tracking.tracker import ToolCallTracker if args is None: args = {} registry_base = get_registry_base_url() - registry_host = f'{registry_base}/functions/call' + registry_host = f"{registry_base}/functions/call" # Add agent_id query parameter if provided if agent_id: - registry_host += f'?agent_id={agent_id}' + registry_host += f"?agent_id={agent_id}" payload = {"function_name": api_name, "app_name": app_name, "args": args} - timeout_seconds = getattr(settings.advanced_features, 'tool_call_timeout', 30) + timeout_seconds = getattr(settings.advanced_features, "tool_call_timeout", 30) start_time = time.time() result = None error_msg = None @@ -115,30 +116,30 @@ def _convert_openapi_params_to_json_schema(parameters: List[Dict[str, Any]]) -> required = [] for param in parameters: - param_name = param.get('name', '') + param_name = param.get("name", "") if not param_name: continue properties[param_name] = { - 'type': param.get('type', 'string'), - 'description': param.get('description', ''), + "type": param.get("type", "string"), + "description": param.get("description", ""), } # Handle default values - default_val = param.get('default') + default_val = param.get("default") if default_val is not None: - properties[param_name]['default'] = default_val + properties[param_name]["default"] = default_val # Handle constraints - constraints = param.get('constraints', []) + constraints = param.get("constraints", []) if constraints: - properties[param_name]['constraints'] = constraints + properties[param_name]["constraints"] = constraints # Handle required - if param.get('required', False): + if param.get("required", False): required.append(param_name) - return {'properties': properties, 'required': required} + return {"properties": properties, "required": required} def create_tool_from_api_dict( @@ -155,10 +156,10 @@ def create_tool_from_api_dict( Returns: StructuredTool instance with .func attribute """ - description = tool_def.get('description', '') - parameters = tool_def.get('parameters', {}) - response_schemas = tool_def.get('response_schemas', {}) - operation_id = tool_def.get('operation_id') # Original OpenAPI operationId + description = tool_def.get("description", "") + parameters = tool_def.get("parameters", {}) + response_schemas = tool_def.get("response_schemas", {}) + operation_id = tool_def.get("operation_id") # Original OpenAPI operationId # Convert OpenAPI parameter format to JSON schema format if needed if isinstance(parameters, list): @@ -167,37 +168,37 @@ def create_tool_from_api_dict( field_definitions = {} param_constraints = {} if isinstance(parameters, dict): - if 'properties' in parameters: - props = parameters['properties'] - required = parameters.get('required', []) + if "properties" in parameters: + props = parameters["properties"] + required = parameters.get("required", []) for param_name, param_schema in props.items(): - param_type = param_schema.get('type', 'string') - param_desc = param_schema.get('description', '') + param_type = param_schema.get("type", "string") + param_desc = param_schema.get("description", "") # Handle type that might be a list (e.g., ['string', 'null']) if isinstance(param_type, list): # Take the first non-null type, or default to 'string' - param_type = next((t for t in param_type if t != 'null'), 'string') + param_type = next((t for t in param_type if t != "null"), "string") type_mapping = { - 'string': str, - 'integer': int, - 'number': float, - 'boolean': bool, - 'array': list, - 'object': dict, + "string": str, + "integer": int, + "number": float, + "boolean": bool, + "array": list, + "object": dict, } python_type = type_mapping.get(param_type, str) # Store constraints for later use in prompt - constraints = param_schema.get('constraints', []) + constraints = param_schema.get("constraints", []) if constraints: param_constraints[param_name] = constraints if param_name in required: field_definitions[param_name] = (python_type, Field(..., description=param_desc)) else: - default_val = param_schema.get('default', None) + default_val = param_schema.get("default", None) # Make sure default values are hashable if needed if isinstance(default_val, list): default_val = None # Skip unhashable defaults @@ -241,10 +242,10 @@ async def tool_func(*args, **kwargs): tool.func = tool_func - if not hasattr(tool.func, '_response_schemas'): + if not hasattr(tool.func, "_response_schemas"): tool.func._response_schemas = response_schemas - if not hasattr(tool.func, '_param_constraints'): + if not hasattr(tool.func, "_param_constraints"): tool.func._param_constraints = param_constraints # Store metadata for tool call tracking @@ -255,16 +256,14 @@ async def tool_func(*args, **kwargs): class ToolRegistryProvider(ToolProviderInterface): - """ - Tool provider that loads tools from the MCP registry. + """Tool provider that loads tools from the MCP registry. This provider connects to the registry server to get apps and tools. Tools are loaded from OpenAPI specs, MCP servers, or TRM services. """ def __init__(self, app_names: Optional[List[str]] = None, agent_id: Optional[str] = None): - """ - Initialize the registry provider. + """Initialize the registry provider. Args: app_names: Optional list of specific app names to load. If None, loads all. @@ -290,14 +289,14 @@ async def initialize(self): raise Exception(f"None of the requested apps found: {self.app_names}") self.apps = [ AppDefinition( - name=app.name, url=app.url, description=app.description, type=getattr(app, 'type', 'api') + name=app.name, url=app.url, description=app.description, type=getattr(app, "type", "api") ) for app in filtered_apps ] else: self.apps = [ AppDefinition( - name=app.name, url=app.url, description=app.description, type=getattr(app, 'type', 'api') + name=app.name, url=app.url, description=app.description, type=getattr(app, "type", "api") ) for app in all_apps ] @@ -312,8 +311,7 @@ async def get_apps(self) -> List[AppDefinition]: return self.apps async def get_tools(self, app_name: str) -> List[StructuredTool]: - """ - Get tools for a specific application. + """Get tools for a specific application. Args: app_name: Name of the application diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_tool_call_args.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_arguments.py similarity index 91% rename from src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_tool_call_args.py rename to src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_arguments.py index 4f28135b..170c6d93 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_tool_call_args.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_arguments.py @@ -1,4 +1,4 @@ -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_call_args import merge_tool_call_args +from cuga.backend.cuga_graph.nodes.cuga_lite.tracking.arguments import merge_tool_call_args def test_single_dict_unpacks_to_named_params(): diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_cuga_lite_graph_evolve_guidelines.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_cuga_lite_graph_evolve_guidelines.py index 3047c1e8..122e489b 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_cuga_lite_graph_evolve_guidelines.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_cuga_lite_graph_evolve_guidelines.py @@ -7,7 +7,7 @@ CugaLiteState, create_cuga_lite_graph, ) -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ( +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.base import ( AppDefinition, ToolProviderInterface, ) @@ -56,7 +56,7 @@ async def test_cuga_lite_evolve_guidelines_are_injected_independently_of_legacy_ False, ), patch( - "cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_nodes.apply_context_summarization", + "cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.shared_nodes.apply_context_summarization", new=AsyncMock(side_effect=lambda messages, *args, **kwargs: messages), ), patch( diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_approval_handler.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_approval_handler.py index e40ae630..5f334a1b 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_approval_handler.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_approval_handler.py @@ -12,7 +12,7 @@ from langgraph.types import Command from langgraph.graph import END -from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import ( +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.graph_nodes import ( CoreGraphAdapter, append_chat_messages_with_step_limit as _core_append_with_step_limit, create_error_command as _core_create_error_command, @@ -42,7 +42,7 @@ def is_returning_from_approval(adapter: CoreGraphAdapter, state: Any) -> bool: @staticmethod def extract_approved_code(adapter: CoreGraphAdapter, state: Any) -> Optional[str]: """Extract the approved code from the last AI message.""" - from cuga.backend.cuga_graph.nodes.cuga_agent_core.code_extraction import ( + from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.code_extraction import ( extract_and_combine_codeblocks, ) diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tracking/__init__.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tracking/__init__.py new file mode 100644 index 00000000..ddf16f5d --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tracking/__init__.py @@ -0,0 +1,4 @@ +"""CugaLite Tool Call Tracking subpackage. + +Contains observability and tool call argument normalization utilities. +""" diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_call_args.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tracking/arguments.py similarity index 100% rename from src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_call_args.py rename to src/cuga/backend/cuga_graph/nodes/cuga_lite/tracking/arguments.py diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_call_tracker.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tracking/tracker.py similarity index 93% rename from src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_call_tracker.py rename to src/cuga/backend/cuga_graph/nodes/cuga_lite/tracking/tracker.py index e2548fa5..4e972b7c 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_call_tracker.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tracking/tracker.py @@ -1,35 +1,36 @@ -""" -Tool Call Tracker +"""Tool Call Tracker Tracks tool/API calls during agent execution for observability. Uses contextvars for thread-safe tracking across async execution. For custom tool providers, use the `tracked_tool` decorator: - from cuga.backend.cuga_graph.nodes.cuga_lite.tool_call_tracker import tracked_tool + from cuga.backend.cuga_graph.nodes.cuga_lite.tracking.tracker import tracked_tool - @tracked_tool(operation_id="getUsers", app_name="my_api") + @tracked_tool(app_name="my_api") async def get_users(limit: int = 10) -> list: return await fetch_users(limit) """ +from __future__ import annotations + +import asyncio import contextvars import functools import time from datetime import datetime -from typing import List, Dict, Any, Optional, Callable, TypeVar +from typing import Any, Callable, Dict, List, Optional, TypeVar from loguru import logger - _tool_calls_context: contextvars.ContextVar[List[Dict[str, Any]]] = contextvars.ContextVar( - 'tool_calls', default=None + "tool_calls", default=None ) _tracking_enabled_context: contextvars.ContextVar[bool] = contextvars.ContextVar( - 'tracking_enabled', default=False + "tracking_enabled", default=False ) -F = TypeVar('F', bound=Callable[..., Any]) +F = TypeVar("F", bound=Callable[..., Any]) class ToolCallTracker: @@ -118,8 +119,7 @@ def tracked_tool( *, app_name: Optional[str] = None, ) -> Callable[[F], F]: - """ - Decorator to automatically track tool calls in custom tool providers. + """Decorator to automatically track tool calls in custom tool providers. Use this decorator on tool functions to enable tracking when `track_tool_calls=True` is passed to `agent.invoke()`. @@ -139,7 +139,7 @@ def multiply(a: int, b: int) -> int: # With app_name for grouping @tracked_tool(app_name="calculator") def add(a: int, b: int) -> int: - return a + b + return a * b # Works with async functions too @tracked_tool(app_name="user_service") @@ -215,8 +215,6 @@ def sync_wrapper(*args, **kwargs): error=error_msg, ) - import asyncio - if asyncio.iscoroutinefunction(func): return async_wrapper # type: ignore return sync_wrapper # type: ignore diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/cuga_supervisor_graph.py b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/cuga_supervisor_graph.py index 914e308b..bc166ae9 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/cuga_supervisor_graph.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/cuga_supervisor_graph.py @@ -20,19 +20,19 @@ ) from cuga.sdk import CugaAgent from cuga.config import settings -from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import ( +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.graph_nodes import ( CoreGraphAdapter, append_chat_messages_with_step_limit as _core_append_with_step_limit, create_error_command as _core_create_error_command, ) -from cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_nodes import ( +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.shared_nodes import ( create_call_model_node as _create_shared_call_model_node, ) -from cuga.backend.cuga_graph.nodes.cuga_agent_core.shared_graph import build_agent_graph +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.shared_graph import build_agent_graph from cuga.backend.cuga_graph.nodes.cuga_supervisor.supervisor_graph_adapter import ( SupervisorGraphAdapter, ) -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ToolProviderInterface +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.base import ToolProviderInterface class _CugaSupervisorLoopAdapter(CoreGraphAdapter): diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/cuga_supervisor_state.py b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/cuga_supervisor_state.py index 3be664fd..58e61d43 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/cuga_supervisor_state.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/cuga_supervisor_state.py @@ -3,7 +3,7 @@ """ from typing import Any, Dict, List, Optional -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field from langchain_core.messages import BaseMessage from cuga.backend.cuga_graph.state.agent_state import AgentState @@ -54,8 +54,7 @@ class CugaSupervisorState(AgentState): cuga_lite_max_steps: Optional[int] = None - class Config: - arbitrary_types_allowed = True + model_config = ConfigDict(arbitrary_types_allowed=True) @property def supervisor_variables_manager(self): diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/supervisor_graph_adapter.py b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/supervisor_graph_adapter.py index aefe4a54..2207756a 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/supervisor_graph_adapter.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/supervisor_graph_adapter.py @@ -26,7 +26,7 @@ from langchain_core.messages import BaseMessage, HumanMessage from loguru import logger -from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import CoreGraphAdapter +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.graph_nodes import CoreGraphAdapter from cuga.config import settings @@ -118,12 +118,12 @@ def build_prepare_node(self) -> Callable: CugaSupervisorState, ) from cuga.backend.cuga_graph.nodes.cuga_lite.tool_approval_handler import ToolApprovalHandler - from cuga.backend.cuga_graph.nodes.cuga_agent_core.runtime_tools import ( + from cuga.backend.cuga_graph.nodes.cuga_agent_core.tools.runtime_tools import ( build_runtime_tools, prompt_tool_dicts, resolve_runtime_backends, ) - from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution_policy import ( + from cuga.backend.cuga_graph.nodes.cuga_agent_core.policy.execution_policy import ( ExecutionRouter, split_execution_note, ) @@ -170,7 +170,7 @@ async def delegate_to_agent(task: str, variables: Optional[List[str]] = None) -> and result.variables and adapter._shared_vm_ref[0] is not None ): - from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import ( + from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.variable_bridge import ( VariableBridge, ) @@ -338,8 +338,10 @@ async def prepare_agents_and_prompt( } agent_tools_for_prompt.append(tool_info) - from cuga.backend.cuga_graph.nodes.cuga_lite.agent_graph_adapter import create_update_todos_tool - from cuga.backend.cuga_graph.nodes.cuga_agent_core.code_extraction import make_tool_awaitable + from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.todos import create_update_todos_tool + from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.code_extraction import ( + make_tool_awaitable, + ) todos_tool = await create_update_todos_tool() adapter._agent_tools_context["create_update_todos"] = make_tool_awaitable(todos_tool.func) @@ -445,12 +447,12 @@ def build_execute_node(self) -> Callable: ) from cuga.backend.cuga_graph.nodes.cuga_lite.tool_approval_handler import ToolApprovalHandler from cuga.backend.cuga_graph.nodes.cuga_lite.executors import CodeExecutor - from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph_nodes import ( + from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.graph_nodes import ( append_chat_messages_with_step_limit as _core_append, create_error_command as _core_create_error, execution_output_text, ) - from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution_policy import ExecutionRouter + from cuga.backend.cuga_graph.nodes.cuga_agent_core.policy.execution_policy import ExecutionRouter from langchain_core.runnables import RunnableConfig adapter = self diff --git a/src/cuga/backend/cuga_graph/policy/tests/helpers.py b/src/cuga/backend/cuga_graph/policy/tests/helpers.py index c9b874a4..e64122bb 100644 --- a/src/cuga/backend/cuga_graph/policy/tests/helpers.py +++ b/src/cuga/backend/cuga_graph/policy/tests/helpers.py @@ -16,7 +16,7 @@ from cuga.backend.cuga_graph.nodes.human_in_the_loop.followup_model import ActionResponse from cuga.backend.llm.models import LLMManager from cuga.config import settings -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ToolProviderInterface +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.base import ToolProviderInterface async def setup_policy_storage( diff --git a/src/cuga/backend/cuga_graph/policy/tests/test_e2e_healthcare_family_claims.py b/src/cuga/backend/cuga_graph/policy/tests/test_e2e_healthcare_family_claims.py index 5741c789..aeedc464 100644 --- a/src/cuga/backend/cuga_graph/policy/tests/test_e2e_healthcare_family_claims.py +++ b/src/cuga/backend/cuga_graph/policy/tests/test_e2e_healthcare_family_claims.py @@ -7,7 +7,7 @@ from langchain_core.tools import StructuredTool from cuga.backend.cuga_graph.policy.models import Playbook, NaturalLanguageTrigger -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ToolProviderInterface +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.base import ToolProviderInterface from .helpers import ( setup_policy_storage, diff --git a/src/cuga/backend/cuga_graph/policy/tests/test_e2e_output_formatter.py b/src/cuga/backend/cuga_graph/policy/tests/test_e2e_output_formatter.py index 04499e5d..1cf7ac28 100644 --- a/src/cuga/backend/cuga_graph/policy/tests/test_e2e_output_formatter.py +++ b/src/cuga/backend/cuga_graph/policy/tests/test_e2e_output_formatter.py @@ -12,7 +12,7 @@ NaturalLanguageTrigger, ) -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ToolProviderInterface +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.base import ToolProviderInterface from .helpers import ( setup_policy_storage, diff --git a/src/cuga/backend/cuga_graph/policy/tests/test_e2e_playbook_guidance.py b/src/cuga/backend/cuga_graph/policy/tests/test_e2e_playbook_guidance.py index a0b61f08..ab2e80ca 100644 --- a/src/cuga/backend/cuga_graph/policy/tests/test_e2e_playbook_guidance.py +++ b/src/cuga/backend/cuga_graph/policy/tests/test_e2e_playbook_guidance.py @@ -3,7 +3,7 @@ import pytest from cuga.backend.cuga_graph.policy.models import Playbook, PlaybookStep, KeywordTrigger -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ToolProviderInterface +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.base import ToolProviderInterface from .helpers import ( setup_policy_storage, diff --git a/src/cuga/backend/cuga_graph/policy/tests/test_e2e_playbook_refinement.py b/src/cuga/backend/cuga_graph/policy/tests/test_e2e_playbook_refinement.py index 38818632..b26d123b 100644 --- a/src/cuga/backend/cuga_graph/policy/tests/test_e2e_playbook_refinement.py +++ b/src/cuga/backend/cuga_graph/policy/tests/test_e2e_playbook_refinement.py @@ -6,7 +6,7 @@ Playbook, NaturalLanguageTrigger, ) -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ToolProviderInterface +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.base import ToolProviderInterface from .helpers import ( setup_policy_storage, diff --git a/src/cuga/backend/cuga_graph/policy/tests/test_e2e_tool_enrichment.py b/src/cuga/backend/cuga_graph/policy/tests/test_e2e_tool_enrichment.py index 9db7b6d3..d607b298 100644 --- a/src/cuga/backend/cuga_graph/policy/tests/test_e2e_tool_enrichment.py +++ b/src/cuga/backend/cuga_graph/policy/tests/test_e2e_tool_enrichment.py @@ -15,7 +15,7 @@ ) from cuga.backend.cuga_graph.policy.models import KeywordTrigger, ToolGuide -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ToolProviderInterface +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.base import ToolProviderInterface from langchain_core.tools import StructuredTool from pydantic import BaseModel diff --git a/src/cuga/backend/cuga_graph/policy/tests/test_tool_approval_full_graph.py b/src/cuga/backend/cuga_graph/policy/tests/test_tool_approval_full_graph.py index 60d78b72..3b9284dc 100644 --- a/src/cuga/backend/cuga_graph/policy/tests/test_tool_approval_full_graph.py +++ b/src/cuga/backend/cuga_graph/policy/tests/test_tool_approval_full_graph.py @@ -17,7 +17,7 @@ from cuga.backend.cuga_graph.state.agent_state import AgentState from cuga.backend.cuga_graph.nodes.human_in_the_loop.followup_model import ActionResponse, ActionType -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ( +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.base import ( ToolProviderInterface, AppDefinition, ) diff --git a/src/cuga/backend/server/main.py b/src/cuga/backend/server/main.py index c5fdaf27..c802af3f 100644 --- a/src/cuga/backend/server/main.py +++ b/src/cuga/backend/server/main.py @@ -676,7 +676,7 @@ async def _knowledge_warmup_then_maybe_oobe_pdf(): if settings.advanced_features.langfuse_tracing and CallbackHandler is not None else None ) - from cuga.backend.cuga_graph.nodes.cuga_lite.combined_tool_provider import CombinedToolProvider + from cuga.backend.cuga_graph.nodes.cuga_lite.providers.combined import CombinedToolProvider from cuga.backend.server.config_store import load_config, load_draft # Load the latest published config so both agents start with the correct LLM. diff --git a/src/cuga/sdk.py b/src/cuga/sdk.py index 399137e2..a9855fc0 100644 --- a/src/cuga/sdk.py +++ b/src/cuga/sdk.py @@ -86,10 +86,10 @@ def delete_database(table: str) -> str: from cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_graph import ( create_cuga_lite_graph, ) -from cuga.backend.cuga_graph.nodes.cuga_lite.direct_langchain_tools_provider import ( +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.langchain import ( DirectLangChainToolsProvider, ) -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ToolProviderInterface +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.base import ToolProviderInterface from cuga.backend.cuga_graph.policy.configurable import PolicyConfigurable from cuga.backend.cuga_graph.nodes.answer.final_answer_agent.prompts.load_prompt import ( FinalAnswerAppworldOutput, @@ -1807,7 +1807,7 @@ async def invoke( # Get tool calls from result (only if tracking was enabled) tool_calls = result.get("tool_calls", []) if track_tool_calls else [] - from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.variable_bridge import VariableBridge _hitl_variables = VariableBridge.extract_values(result.get("variables_storage", {}) or {}) @@ -1948,7 +1948,7 @@ async def invoke( tool_calls = result.get("tool_calls", []) if track_tool_calls else [] # Extract sub-agent variables for VariableBridge (Phase 8). - from cuga.backend.cuga_graph.nodes.cuga_agent_core.variable_bridge import VariableBridge + from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.variable_bridge import VariableBridge _result_variables = VariableBridge.extract_values(result.get("variables_storage", {}) or {}) diff --git a/src/cuga/sdk_core/tests/test_sdk_integration.py b/src/cuga/sdk_core/tests/test_sdk_integration.py index 3ed8db75..2bde0dd1 100644 --- a/src/cuga/sdk_core/tests/test_sdk_integration.py +++ b/src/cuga/sdk_core/tests/test_sdk_integration.py @@ -9,7 +9,7 @@ from langchain_core.tools import tool from cuga import CugaAgent, run_agent -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ( +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.base import ( ToolProviderInterface, AppDefinition, ) @@ -44,6 +44,8 @@ def get_user_count() -> int: class TestToolProvider(ToolProviderInterface): """Test tool provider for integration tests""" + __test__ = False + def __init__(self, tools): self.tools = tools self.initialized = False diff --git a/src/cuga/supervisor_utils/supervisor_config.py b/src/cuga/supervisor_utils/supervisor_config.py index a55f26d1..e122d1bc 100644 --- a/src/cuga/supervisor_utils/supervisor_config.py +++ b/src/cuga/supervisor_utils/supervisor_config.py @@ -10,8 +10,8 @@ if TYPE_CHECKING: pass -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ToolProviderInterface -from cuga.backend.cuga_graph.nodes.cuga_lite.combined_tool_provider import CombinedToolProvider +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.base import ToolProviderInterface +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.combined import CombinedToolProvider class SupervisorConfig(BaseModel): diff --git a/tests/integration/test_llm_config_publish.py b/tests/integration/test_llm_config_publish.py index 7cad0477..c5c9d216 100644 --- a/tests/integration/test_llm_config_publish.py +++ b/tests/integration/test_llm_config_publish.py @@ -135,7 +135,7 @@ async def test_build_graph_uses_create_llm_from_config(self): """When llm_config is set, build_graph calls create_llm_from_config with it.""" from unittest.mock import AsyncMock from cuga.backend.cuga_graph.graph import DynamicAgentGraph - from cuga.backend.cuga_graph.nodes.cuga_lite.tool_provider_interface import ToolProviderInterface + from cuga.backend.cuga_graph.nodes.cuga_lite.providers.base import ToolProviderInterface mock_tp = MagicMock(spec=ToolProviderInterface) mock_tp.initialize = AsyncMock() diff --git a/tests/integration/test_tool_call_tracking.py b/tests/integration/test_tool_call_tracking.py index 646adc32..54bb44d8 100644 --- a/tests/integration/test_tool_call_tracking.py +++ b/tests/integration/test_tool_call_tracking.py @@ -16,8 +16,8 @@ import pytest from cuga.sdk import CugaAgent -from cuga.backend.cuga_graph.nodes.cuga_lite.combined_tool_provider import CombinedToolProvider -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_call_tracker import ToolCallTracker +from cuga.backend.cuga_graph.nodes.cuga_lite.providers.combined import CombinedToolProvider +from cuga.backend.cuga_graph.nodes.cuga_lite.tracking.tracker import ToolCallTracker @pytest.mark.e2e @@ -261,7 +261,7 @@ class TestTrackedToolDecorator: def test_tracked_tool_decorator_simple(self): """Test @tracked_tool decorator without any arguments.""" - from cuga.backend.cuga_graph.nodes.cuga_lite.tool_call_tracker import tracked_tool + from cuga.backend.cuga_graph.nodes.cuga_lite.tracking.tracker import tracked_tool @tracked_tool def add(a: int, b: int) -> int: @@ -282,7 +282,7 @@ def add(a: int, b: int) -> int: def test_tracked_tool_decorator_with_app_name(self): """Test @tracked_tool decorator with app_name.""" - from cuga.backend.cuga_graph.nodes.cuga_lite.tool_call_tracker import tracked_tool + from cuga.backend.cuga_graph.nodes.cuga_lite.tracking.tracker import tracked_tool @tracked_tool(app_name="calculator") def multiply(a: int, b: int) -> int: @@ -301,7 +301,7 @@ def multiply(a: int, b: int) -> int: @pytest.mark.asyncio async def test_tracked_tool_decorator_async(self): """Test @tracked_tool decorator with async function.""" - from cuga.backend.cuga_graph.nodes.cuga_lite.tool_call_tracker import tracked_tool + from cuga.backend.cuga_graph.nodes.cuga_lite.tracking.tracker import tracked_tool @tracked_tool(app_name="user_service") async def fetch_user(user_id: int) -> dict: @@ -320,7 +320,7 @@ async def fetch_user(user_id: int) -> dict: def test_tracked_tool_records_error(self): """Test that @tracked_tool records errors.""" - from cuga.backend.cuga_graph.nodes.cuga_lite.tool_call_tracker import tracked_tool + from cuga.backend.cuga_graph.nodes.cuga_lite.tracking.tracker import tracked_tool @tracked_tool(app_name="failing_service") def failing_func() -> None: @@ -339,7 +339,7 @@ def failing_func() -> None: def test_tracked_tool_no_tracking_when_disabled(self): """Test that @tracked_tool doesn't record when tracking is disabled.""" - from cuga.backend.cuga_graph.nodes.cuga_lite.tool_call_tracker import tracked_tool + from cuga.backend.cuga_graph.nodes.cuga_lite.tracking.tracker import tracked_tool @tracked_tool def my_func(x: int) -> int: @@ -375,7 +375,7 @@ class TestToolProviderOperationId: def test_create_tool_from_api_dict_stores_operation_id(self): """Test that create_tool_from_api_dict stores operation_id on tool.func.""" - from cuga.backend.cuga_graph.nodes.cuga_lite.tool_registry_provider import create_tool_from_api_dict + from cuga.backend.cuga_graph.nodes.cuga_lite.providers.registry import create_tool_from_api_dict tool_def = { "description": "Get all accounts", @@ -397,7 +397,7 @@ def test_create_tool_from_api_dict_stores_operation_id(self): def test_operation_id_not_in_model_dump(self): """Test that _operation_id is NOT serialized in tool.model_dump() - ensuring it won't leak into prompts.""" - from cuga.backend.cuga_graph.nodes.cuga_lite.tool_registry_provider import create_tool_from_api_dict + from cuga.backend.cuga_graph.nodes.cuga_lite.providers.registry import create_tool_from_api_dict tool_def = { "description": "Get all accounts", @@ -427,7 +427,7 @@ def test_operation_id_not_in_model_dump(self): def test_operation_id_not_in_prompt_serialization(self): """Test that operation_id does NOT appear in prompt-formatted tool output.""" - from cuga.backend.cuga_graph.nodes.cuga_lite.tool_registry_provider import create_tool_from_api_dict + from cuga.backend.cuga_graph.nodes.cuga_lite.providers.registry import create_tool_from_api_dict from cuga.backend.cuga_graph.nodes.cuga_lite.prompt_utils import PromptUtils tool_def = { @@ -458,7 +458,7 @@ def test_operation_id_not_in_prompt_serialization(self): def test_create_tool_from_api_dict_handles_missing_operation_id(self): """Test that create_tool_from_api_dict handles missing operation_id gracefully.""" - from cuga.backend.cuga_graph.nodes.cuga_lite.tool_registry_provider import create_tool_from_api_dict + from cuga.backend.cuga_graph.nodes.cuga_lite.providers.registry import create_tool_from_api_dict tool_def = { "description": "Get all accounts", @@ -477,7 +477,7 @@ def test_create_tool_from_api_dict_handles_missing_operation_id(self): def test_create_tool_from_tracker_stores_operation_id(self): """Test that create_tool_from_tracker stores operation_id on tool.func.""" - from cuga.backend.cuga_graph.nodes.cuga_lite.combined_tool_provider import create_tool_from_tracker + from cuga.backend.cuga_graph.nodes.cuga_lite.providers.combined import create_tool_from_tracker tool_def = { "description": "Get all accounts", diff --git a/tests/system/test_manager_api_integration.py b/tests/system/test_manager_api_integration.py index 5b992f8e..a4c65ca3 100644 --- a/tests/system/test_manager_api_integration.py +++ b/tests/system/test_manager_api_integration.py @@ -33,7 +33,7 @@ MANAGE_API_URL = f"{MANAGER_BASE_URL}/api/manage" STREAM_API_URL = f"{MANAGER_BASE_URL}/stream" TEST_AGENT_ID = "cuga-default" -MANAGER_STARTUP_TIMEOUT = 60 # seconds +MANAGER_STARTUP_TIMEOUT = 120 # seconds MANAGER_HEALTH_CHECK_INTERVAL = 1 # seconds diff --git a/tests/unit/test_cuga_lite_bind_tools.py b/tests/unit/test_cuga_lite_bind_tools.py index a85c2183..45f2cbed 100644 --- a/tests/unit/test_cuga_lite_bind_tools.py +++ b/tests/unit/test_cuga_lite_bind_tools.py @@ -5,7 +5,7 @@ import pytest from langchain_core.tools import StructuredTool -from cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_graph import resolve_model_with_bind_tools +from cuga.backend.cuga_graph.nodes.cuga_lite.helpers.bind_tools import resolve_model_with_bind_tools from cuga.backend.cuga_graph.nodes.cuga_lite.model_runtime_profile import resolve_bind_tools_fields diff --git a/tests/unit/test_cuga_lite_knowledge_scopes.py b/tests/unit/test_cuga_lite_knowledge_scopes.py index 88b27f83..62b45df0 100644 --- a/tests/unit/test_cuga_lite_knowledge_scopes.py +++ b/tests/unit/test_cuga_lite_knowledge_scopes.py @@ -2,7 +2,7 @@ from types import SimpleNamespace -from cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_graph import ( +from cuga.backend.cuga_graph.nodes.cuga_lite.agent_graph_adapter import ( _get_knowledge_tool_scope_context, ) from cuga.backend.cuga_graph.nodes.cuga_lite.executors.e2b.e2b_executor import E2BExecutor diff --git a/tests/unit/test_find_tools_exception.py b/tests/unit/test_find_tools_exception.py index f2033be6..cdb0831d 100644 --- a/tests/unit/test_find_tools_exception.py +++ b/tests/unit/test_find_tools_exception.py @@ -26,7 +26,7 @@ def mock_apps(): async def _get_find_tools_func(mock_tools, mock_apps): """Create find_tools_tool and extract the inner async function for direct testing.""" - from cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_graph import ( + from cuga.backend.cuga_graph.nodes.cuga_lite.helpers.find_tools import ( create_find_tools_tool, ) @@ -44,7 +44,7 @@ async def _get_find_tools_func(mock_tools, mock_apps): async def test_find_tools_func_returns_error_on_output_parser_exception(mock_tools, mock_apps): """When PromptUtils.find_tools raises OutputParserException, return an error string.""" with patch( - "cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_graph.PromptUtils.find_tools", + "cuga.backend.cuga_graph.nodes.cuga_lite.helpers.find_tools.PromptUtils.find_tools", new_callable=AsyncMock, side_effect=OutputParserException("Invalid json output: "), ): @@ -60,7 +60,7 @@ async def test_find_tools_func_returns_error_on_output_parser_exception(mock_too async def test_find_tools_func_returns_error_on_generic_exception(mock_tools, mock_apps): """Any exception type should be caught and return a generic internal error string with source error.""" with patch( - "cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_graph.PromptUtils.find_tools", + "cuga.backend.cuga_graph.nodes.cuga_lite.helpers.find_tools.PromptUtils.find_tools", new_callable=AsyncMock, side_effect=RuntimeError("unexpected LLM failure"), ): @@ -78,7 +78,7 @@ async def test_find_tools_func_success_passes_through(mock_tools, mock_apps): expected = "## 1. `test_tool`\nSome tool details" with patch( - "cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_graph.PromptUtils.find_tools", + "cuga.backend.cuga_graph.nodes.cuga_lite.helpers.find_tools.PromptUtils.find_tools", new_callable=AsyncMock, return_value=expected, ): @@ -91,11 +91,11 @@ async def test_find_tools_func_success_passes_through(mock_tools, mock_apps): @pytest.mark.asyncio async def test_find_tools_composes_query_with_initial_user_message(mock_tools, mock_apps): """When initial_user_message is set, shortlister query includes task context.""" - from cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_graph import create_find_tools_tool + from cuga.backend.cuga_graph.nodes.cuga_lite.helpers.find_tools import create_find_tools_tool app_to_tools_map = {"test_app": mock_tools} with patch( - "cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_graph.PromptUtils.find_tools", + "cuga.backend.cuga_graph.nodes.cuga_lite.helpers.find_tools.PromptUtils.find_tools", new_callable=AsyncMock, return_value="ok", ) as mock_find: @@ -111,5 +111,5 @@ async def test_find_tools_composes_query_with_initial_user_message(mock_tools, m mock_find.assert_awaited_once() call_kw = mock_find.await_args.kwargs assert call_kw["query"] == ( - "query: list calendar tools,\nTask context (initial user message): Book a flight to NYC" + "Query: list calendar tools\nTask context (initial user message): Book a flight to NYC" ) From ac3af57d67381f8fcc6dcc6bfc5d0f51a11842ad Mon Sep 17 00:00:00 2001 From: Sami Marreed Date: Sun, 24 May 2026 20:09:11 +0300 Subject: [PATCH 7/7] refactor: slim graph adapters and move shared policy to core Move ToolApprovalHandler into cuga_agent_core, extract supervisor prepare/execute nodes, and split the CugaLite adapter into focused modules. Address CodeRabbit findings for step-limit ordering, find_tools fallback, StructuredTool.func backfill, and Loguru formatting. --- .../cuga_agent_core/graph/graph_nodes.py | 2 +- .../cuga_agent_core/graph/shared_nodes.py | 18 +- .../policy}/tool_approval_handler.py | 59 +- .../policy/test_tool_approval_adapter.py | 2 +- .../nodes/cuga_lite/adapter/__init__.py | 5 + .../nodes/cuga_lite/adapter/graph_adapter.py | 172 +++ .../nodes/cuga_lite/adapter/prepare_node.py | 636 ++++++++++ .../nodes/cuga_lite/adapter/response_utils.py | 63 + .../nodes/cuga_lite/adapter/sandbox_node.py | 231 ++++ .../nodes/cuga_lite/agent_graph_adapter.py | 1103 +---------------- .../nodes/cuga_lite/cuga_lite_graph.py | 2 +- .../nodes/cuga_lite/helpers/bind_tools.py | 8 +- .../nodes/cuga_lite/helpers/find_tools.py | 2 + .../nodes/cuga_lite/providers/langchain.py | 17 +- .../tests/test_agent_graph_adapter.py | 4 +- .../nodes/cuga_supervisor/delegation.py | 113 ++ .../nodes/execute_agent_tool.py | 107 ++ .../nodes/prepare_agents_and_prompt.py | 249 ++++ .../supervisor_graph_adapter.py | 506 +------- tests/unit/test_cuga_lite_knowledge_scopes.py | 4 +- 20 files changed, 1634 insertions(+), 1669 deletions(-) rename src/cuga/backend/cuga_graph/nodes/{cuga_lite => cuga_agent_core/policy}/tool_approval_handler.py (83%) create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_lite/adapter/__init__.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_lite/adapter/graph_adapter.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_lite/adapter/prepare_node.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_lite/adapter/response_utils.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_lite/adapter/sandbox_node.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_supervisor/delegation.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_supervisor/nodes/execute_agent_tool.py create mode 100644 src/cuga/backend/cuga_graph/nodes/cuga_supervisor/nodes/prepare_agents_and_prompt.py diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph/graph_nodes.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph/graph_nodes.py index dbe0cf0a..ed64ad95 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph/graph_nodes.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph/graph_nodes.py @@ -33,7 +33,7 @@ class CoreGraphAdapter(ABC): messages_key: str #: Approval seams — defaults are exactly the legacy Lite values so the - #: adapter-ized ToolApprovalHandler stays byte-identical for Lite. + #: shared ToolApprovalHandler stays byte-identical for Lite. metadata_key: str = "cuga_lite_metadata" execute_node_name: str = "sandbox" sender_name: str = "CugaLite" diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph/shared_nodes.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph/shared_nodes.py index d72836e7..5b5080ba 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph/shared_nodes.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/graph/shared_nodes.py @@ -36,7 +36,7 @@ CoreGraphAdapter, enforce_step_limit, ) -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_approval_handler import ToolApprovalHandler +from cuga.backend.cuga_graph.nodes.cuga_agent_core.policy.tool_approval_handler import ToolApprovalHandler from cuga.backend.cuga_graph.utils.context_management_utils import apply_context_summarization @@ -180,14 +180,6 @@ async def call_model(state: Any, config: RunnableConfig = None) -> Command: adapter.on_response_processed(state, code, content) - # ── Tool-approval interrupt for generated code ───────────────────── - if code and settings.policy.enabled: - approval_command = await ToolApprovalHandler.check_and_create_approval_interrupt( - adapter, state, code, content, config - ) - if approval_command: - return approval_command - # ── Build final message list + step count ────────────────────────── final_messages: list = modified_messages + [AIMessage(content=content)] new_step_count: int = state.step_count + 1 @@ -204,6 +196,14 @@ async def call_model(state: Any, config: RunnableConfig = None) -> Command: if limit_cmd is not None: return limit_cmd + # ── Tool-approval interrupt for generated code ───────────────────── + if code and settings.policy.enabled: + approval_command = await ToolApprovalHandler.check_and_create_approval_interrupt( + adapter, state, code, content, config + ) + if approval_command: + return approval_command + # ── Metadata update ──────────────────────────────────────────────── meta_value = adapter.build_metadata_update(state, playbook_fired=playbook_fired) meta_update = {adapter.metadata_key: meta_value} diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_approval_handler.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/policy/tool_approval_handler.py similarity index 83% rename from src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_approval_handler.py rename to src/cuga/backend/cuga_graph/nodes/cuga_agent_core/policy/tool_approval_handler.py index 5f334a1b..3d4814f0 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tool_approval_handler.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/policy/tool_approval_handler.py @@ -1,16 +1,14 @@ -""" -Tool Approval Handler for CugaLite subgraph. +"""Tool approval detection, interruption, and resumption for agent graphs. -Handles the detection, interruption, and resumption of tool approval flows. +Shared by CugaLite and CugaSupervisor via :class:`CoreGraphAdapter` seams. """ from typing import Any, List, Optional -from loguru import logger from langchain_core.messages import AIMessage - -from langgraph.types import Command from langgraph.graph import END +from langgraph.types import Command +from loguru import logger from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.graph_nodes import ( CoreGraphAdapter, @@ -46,7 +44,6 @@ def extract_approved_code(adapter: CoreGraphAdapter, state: Any) -> Optional[str extract_and_combine_codeblocks, ) - # Find the last AI message last_ai_message = None for msg in reversed(adapter.get_messages(state)): if msg.type == "ai": @@ -56,7 +53,6 @@ def extract_approved_code(adapter: CoreGraphAdapter, state: Any) -> Optional[str if not last_ai_message or not last_ai_message.content: return None - # Extract code from the message code = extract_and_combine_codeblocks(last_ai_message.content) if code: logger.info(f"Extracted approved code from last AI message: {len(code)} chars") @@ -66,17 +62,7 @@ def extract_approved_code(adapter: CoreGraphAdapter, state: Any) -> Optional[str @staticmethod def clean_approval_metadata(metadata: dict) -> dict: - """ - Clean approval-related fields from metadata. - - Removes temporary approval fields to avoid interference with future executions. - - Args: - metadata: Current metadata dictionary - - Returns: - Cleaned metadata dictionary - """ + """Remove temporary approval fields from metadata.""" fields_to_remove = [ "approval_required", "user_approved", @@ -93,7 +79,6 @@ def handle_approval_resumption(adapter: CoreGraphAdapter, state: Any) -> Optiona """Handle resumption after user approval: run the approved code.""" logger.info("Returning from tool approval - skipping code generation, executing approved code") - # Extract code from last AI message code = ToolApprovalHandler.extract_approved_code(adapter, state) if not code: @@ -105,10 +90,8 @@ def handle_approval_resumption(adapter: CoreGraphAdapter, state: Any) -> Optiona state.step_count, ) - # Clean approval metadata cleaned_metadata = ToolApprovalHandler.clean_approval_metadata(adapter.get_metadata(state)) - # Route to the graph's execute node with approved code return Command( goto=adapter.execute_node_name, update={ @@ -132,15 +115,12 @@ async def check_and_create_approval_interrupt( try: logger.debug(f"Checking if code requires tool approval (code length: {len(code)} chars)") - # Get policy system from config policy_system = PolicyConfigurable.from_config(config or {}) logger.debug(f"Got policy system: {policy_system}") - # Create context from state context = PolicyConfigurable.create_context_from_state(state, config or {}) logger.debug(f"Created context with user_input: '{context.user_input}'") - # Check if any ToolApproval policies apply to this code policy_match = await policy_system.agent.check_tool_approval_for_code(code, context) logger.debug(f"Policy match result: {policy_match}") @@ -154,7 +134,6 @@ async def check_and_create_approval_interrupt( code_lines = code.split("\n") preview_lines = code_lines - # Store policy metadata for the approval flow approval_metadata = { **adapter.get_metadata(state), "policy_type": "tool_approval", @@ -167,10 +146,8 @@ async def check_and_create_approval_interrupt( "show_code_preview": policy.show_code_preview, } - # Update state metadata temporarily for the interrupt creation adapter.set_metadata(state, approval_metadata) - # Create the approval interrupt return ToolApprovalHandler._create_approval_interrupt( adapter, state, code, content, preview_lines ) @@ -192,7 +169,6 @@ def _create_approval_interrupt( md = adapter.get_metadata(state) - # Create approval request metadata approval_metadata = { **md, "approval_required": True, @@ -200,13 +176,11 @@ def _create_approval_interrupt( "full_code": code if md.get("show_code_preview") else None, } - # Extract policy details policy_name = md.get("policy_name", "Tool Approval") approval_msg = md.get("approval_message", "This tool requires your approval before execution.") tools_list = md.get("required_tools", []) apps_list = md.get("required_apps", []) - # Create HITL action for tool approval hitl_action = create_tool_approval_action( policy_name=policy_name, required_tools=tools_list, @@ -215,7 +189,6 @@ def _create_approval_interrupt( approval_message=approval_msg, ) - # Generate user-friendly markdown message final_answer_text = ToolApprovalHandler._generate_approval_message( policy_name=policy_name, approval_msg=approval_msg, @@ -224,23 +197,21 @@ def _create_approval_interrupt( preview_lines=preview_lines, ) - # Update messages updated_messages, error_message = _core_append_with_step_limit( adapter, state, [AIMessage(content=content)] ) if error_message: return _core_create_error_command(adapter, updated_messages, error_message, state.step_count) - # Return command to exit subgraph and route to parent's SuggestHumanActions -> WaitForResponse return Command( - goto=END, # Exit subgraph to parent callback node + goto=END, update={ adapter.messages_key: updated_messages, "script": code, "final_answer": final_answer_text, adapter.metadata_key: approval_metadata, - "hitl_action": hitl_action, # Set HITL action for parent to detect - "sender": adapter.sender_name, # Mark sender for return routing + "hitl_action": hitl_action, + "sender": adapter.sender_name, "step_count": state.step_count + 1, }, ) @@ -253,19 +224,7 @@ def _generate_approval_message( apps_list: List[str], preview_lines: List[str], ) -> str: - """ - Generate user-friendly markdown message for approval request. - - Args: - policy_name: Name of the policy - approval_msg: Approval message from policy - tools_list: List of tools requiring approval - apps_list: List of apps requiring approval - preview_lines: Code preview lines - - Returns: - Formatted markdown string - """ + """Generate user-friendly markdown message for approval request.""" content_lines = [f"## ✋ {policy_name}", "", approval_msg, ""] if tools_list: diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/policy/test_tool_approval_adapter.py b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/policy/test_tool_approval_adapter.py index f30f9d7f..fbb9d2ab 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/policy/test_tool_approval_adapter.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_agent_core/tests/policy/test_tool_approval_adapter.py @@ -15,7 +15,7 @@ from langgraph.graph import END from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.graph_nodes import CoreGraphAdapter -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_approval_handler import ToolApprovalHandler +from cuga.backend.cuga_graph.nodes.cuga_agent_core.policy.tool_approval_handler import ToolApprovalHandler class LiteLikeAdapter(CoreGraphAdapter): diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/adapter/__init__.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/adapter/__init__.py new file mode 100644 index 00000000..7ae288db --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/adapter/__init__.py @@ -0,0 +1,5 @@ +"""CugaLite graph adapter package.""" + +from cuga.backend.cuga_graph.nodes.cuga_lite.adapter.graph_adapter import AgentGraphAdapter + +__all__ = ["AgentGraphAdapter"] diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/adapter/graph_adapter.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/adapter/graph_adapter.py new file mode 100644 index 00000000..d14ff598 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/adapter/graph_adapter.py @@ -0,0 +1,172 @@ +"""AgentGraphAdapter — CoreGraphAdapter implementation for CugaLite (single-agent). + +Defines graph seams and call_model hook overrides. Prompt, tool, and execution +logic live in ``prepare_node.py`` and ``sandbox_node.py``. +""" + +from __future__ import annotations + +from typing import Any, Dict, List, Optional, Tuple + +from langchain_core.messages import BaseMessage +from loguru import logger + +from cuga.backend.activity_tracker.tracker import Step +from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.todos import ( + format_current_plan_section, + format_task_todos_system_block, +) +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.graph_nodes import CoreGraphAdapter +from cuga.backend.cuga_graph.nodes.cuga_lite.adapter.prepare_node import create_prepare_tools_and_apps_node +from cuga.backend.cuga_graph.nodes.cuga_lite.adapter.response_utils import ( + clean_empty_response_retry_meta, + extract_code_from_response_tool_calls, +) +from cuga.backend.cuga_graph.nodes.cuga_lite.adapter.sandbox_node import create_sandbox_node +from cuga.backend.cuga_graph.nodes.cuga_lite.helpers.bind_tools import resolve_model_with_bind_tools +from cuga.backend.cuga_graph.nodes.cuga_lite.nl_auto_continue_classifier import ( + classify_nl_auto_continue, + normalize_assistant_text, +) +from cuga.backend.llm.errors import extract_code_from_tool_use_failed +from cuga.config import settings + + +class AgentGraphAdapter(CoreGraphAdapter): + """CoreGraphAdapter implementation for the CugaLite single-agent graph.""" + + messages_key: str = "chat_messages" + execute_node_name: str = "sandbox" + metadata_key: str = "cuga_lite_metadata" + sender_name: str = "CugaLite" + + def __init__( + self, + *, + tracker: Any, + base_callbacks: Optional[List[Any]], + task_todos_ref: List[Dict[str, str]], + tools_context_ref: Optional[Dict[str, Any]], + base_tool_provider: Any, + model: Any = None, + prompt_template: Any = None, + instructions: Any = None, + special_instructions: Any = None, + tools_context: Optional[Dict[str, Any]] = None, + static_prompt: Any = None, + thread_id: Any = None, + ) -> None: + self._tracker = tracker + self._base_callbacks = base_callbacks or [] + self._task_todos_ref = task_todos_ref + self._tools_context_ref = tools_context_ref + self._base_tool_provider = base_tool_provider + self._model = model + self._prompt_template = prompt_template + self._instructions = instructions + self._special_instructions = special_instructions + self._tools_context = tools_context if tools_context is not None else {} + self._static_prompt = static_prompt + self._thread_id = thread_id + + def get_messages(self, state: Any) -> List[BaseMessage]: + return list(state.chat_messages or []) + + def resolve_max_steps(self, state: Any, override: Optional[int]) -> int: + if override is not None: + return override + return ( + state.cuga_lite_max_steps + if getattr(state, "cuga_lite_max_steps", None) is not None + else getattr(settings.advanced_features, "cuga_lite_max_steps", 50) + ) + + def get_few_shot_messages(self, state: Any) -> List[Any]: + return list(state.mcp_few_shot_messages or []) + + def get_pi(self, state: Any) -> Optional[str]: + return getattr(state, "pi", None) + + def prepare_system_content(self, state: Any, configurable: dict, base_prompt: str) -> str: + if self._task_todos_ref: + return base_prompt + format_task_todos_system_block(self._task_todos_ref) + task_todos = getattr(state, "task_todos", None) + if task_todos: + return base_prompt + format_current_plan_section(task_todos) + return base_prompt + + def get_tracker(self) -> Any: + return self._tracker + + def get_invoke_config(self, configurable: dict) -> dict: + callbacks = configurable.get("callbacks", self._base_callbacks) + return {"callbacks": callbacks} + + async def ainvoke_model(self, bound: Any, messages: list, invoke_config: dict) -> Any: + try: + return await bound.ainvoke(messages, config=invoke_config) + except Exception as exc: + code = extract_code_from_tool_use_failed(exc) + if code: + logger.warning( + "Model attempted tool call without tools bound (tool_use_failed). " + "Using generated code in sandbox" + ) + + class _FakeResponse: + content = f"```python\n{code}\n```" + additional_kwargs: dict = {} + + return _FakeResponse() + raise + + async def resolve_bind_tools(self, state: Any, active_model: Any, configurable: dict) -> Any: + try: + return await resolve_model_with_bind_tools( + active_model, + configurable=configurable, + tools_context_ref=self._tools_context_ref, + tool_provider=self._base_tool_provider, + ) + except Exception as exc: + logger.warning("AgentGraphAdapter.resolve_bind_tools failed: %s", exc) + return None + + def normalize_response(self, response: Any) -> Tuple[str, Optional[str]]: + content = normalize_assistant_text(response.content) + if not content: + tool_code = extract_code_from_response_tool_calls(response) + if tool_code: + logger.warning("Empty content with tool_calls detected; recovering tool call as Python code") + content = tool_code + reasoning = normalize_assistant_text( + (getattr(response, "additional_kwargs", None) or {}).get("reasoning_content") + ) + return content, reasoning + + def on_response_processed(self, state: Any, code: Optional[str], content: str) -> None: + try: + self._tracker.collect_step(step=Step(name="Raw_Assistant_Response", data=content)) + if code: + self._tracker.collect_step(step=Step(name="Assistant_code", data=content)) + else: + self._tracker.collect_step(step=Step(name="Assistant_nl", data=content)) + except Exception as exc: + logger.debug("AgentGraphAdapter.on_response_processed tracker error: %s", exc) + + def build_metadata_update(self, state: Any, *, playbook_fired: bool) -> dict: + meta = clean_empty_response_retry_meta(self.get_metadata(state)) + if playbook_fired: + return {**meta, "playbook_guidance_added": True} + return meta + + async def classify_auto_continue( + self, state: Any, model: Any, content: str, reasoning: Optional[str] + ) -> bool: + return await classify_nl_auto_continue(model, content, reasoning) + + def build_prepare_node(self, lc_bind_tools_meta: dict): + return create_prepare_tools_and_apps_node(self, lc_bind_tools_meta) + + def build_sandbox_node(self, base_thread_id: Any, base_apps_list: Any): + return create_sandbox_node(self, base_thread_id, base_apps_list) diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/adapter/prepare_node.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/adapter/prepare_node.py new file mode 100644 index 00000000..8cbe9619 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/adapter/prepare_node.py @@ -0,0 +1,636 @@ +"""Prepare node for the CugaLite agent graph.""" + +from __future__ import annotations + +import os +from pathlib import Path +from typing import Any, Callable, Optional + +from langchain_core.runnables import RunnableConfig +from langgraph.types import Command +from loguru import logger + +from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.code_extraction import make_tool_awaitable +from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.todos import create_update_todos_tool +from cuga.backend.cuga_graph.nodes.cuga_agent_core.policy.execution_policy import ( + ExecutionRouter, + split_execution_note, +) +from cuga.backend.cuga_graph.nodes.cuga_agent_core.policy.tool_approval_handler import ToolApprovalHandler +from cuga.backend.cuga_graph.nodes.cuga_agent_core.tools.runtime_tools import ( + build_runtime_tools, + resolve_runtime_backends, +) +from cuga.backend.cuga_graph.nodes.cuga_lite.helpers.find_tools import ( + _ensure_web_app, + _first_user_message_text, + _load_default_find_tools_few_shot_examples, + _web_search_enabled, + create_find_tools_tool, +) +from cuga.backend.cuga_graph.nodes.cuga_lite.helpers.knowledge import ( + _get_knowledge_tool_scope_context, + _knowledge_scope_instruction, +) +from cuga.backend.cuga_graph.nodes.cuga_lite.model_runtime_profile import resolved_runtime_model_name +from cuga.backend.cuga_graph.nodes.cuga_lite.prompt_utils import ( + create_mcp_prompt, + format_apps_for_prompt, + normalize_mcp_few_shot_examples, + resolve_cuga_lite_few_shots_enabled, +) +from cuga.backend.cuga_graph.nodes.task_decomposition_planning.analyze_task import TaskAnalyzer +from cuga.backend.cuga_graph.policy.enactment import PolicyEnactment +from cuga.backend.skills import ( + SkillRegistry, + create_skill_tools, + discover_skills, + format_available_skills_block, +) +from cuga.config import settings + + +def create_prepare_tools_and_apps_node(adapter: Any, lc_bind_tools_meta: dict) -> Callable: + async def prepare_tools_and_apps(state: Any, config: Optional[RunnableConfig] = None) -> Command: + """Prepare tools, apps, and prompt once at the start of the graph. + + This node gets tools from tool_provider, filters based on state configuration, + determines if find_tools should be enabled, and prepares the prompt. + Tools are available via closure (per graph instance), prompt is stored in state. + + enable_todos is read from config["configurable"] at runtime. + + Optional configurable key ``mcp_few_shot_examples``: overrides few-shots—a JSON string or + list of dicts with ``role`` and ``content``. If absent (or explicitly ``None``) and + ``find_tools`` is enabled, ``prompts/find_tools_few_shot_examples.json`` (bundled next to the + MCP template) is loaded, with optional fallback to repo ``samples/cuga_lite/mcp_few_shot_examples.json``. + Bundled few-shots only apply when ``find_tools`` shortlisting is active + (``total_tool_count > shortlisting_tool_threshold``, see settings configurable). + + Disable few-shots entirely via ``advanced_features.cuga_lite_enable_few_shots`` in settings.toml + or ``cuga_lite_enable_few_shots`` in configurable (skips prefix chat few-shots). + """ + configurable = config.get("configurable", {}) if config else {} + enable_todos = ( + configurable.get("enable_todos") + if "enable_todos" in configurable + else settings.advanced_features.enable_todos + ) + shortlisting_threshold = ( + configurable.get("shortlisting_tool_threshold") + if "shortlisting_tool_threshold" in configurable + else settings.advanced_features.shortlisting_tool_threshold + ) + _runtime_model_name = resolved_runtime_model_name( + configurable_llm=configurable.get("llm"), + graph_default_model=adapter._model, + ) + few_shots_enabled = resolve_cuga_lite_few_shots_enabled( + configurable, + model_name=_runtime_model_name, + ) + logger.debug( + f"[APPROVAL DEBUG] prepare_tools_and_apps received cuga_lite_metadata: {state.cuga_lite_metadata}" + ) + + # Skip policy checking if policies are disabled or if we're returning from approval + if settings.policy.enabled and not ToolApprovalHandler.should_skip_policy_check(adapter, state): + # Check for policies and enact if matched + # Include IntentGuard, Playbook, and ToolGuide for intent checks + from cuga.backend.cuga_graph.policy.models import PolicyType + + command, metadata = await PolicyEnactment.check_and_enact( + state, + config, + policy_types=[PolicyType.INTENT_GUARD, PolicyType.PLAYBOOK, PolicyType.TOOL_GUIDE], + adapter=adapter, + ) + + # If policy returned a command (e.g., BLOCK_INTENT), execute it immediately + if command: + return command + + # If policy returned metadata (e.g., playbook guidance), store it + if metadata: + adapter.set_metadata(state, metadata) + elif not settings.policy.enabled: + logger.debug("Policy system disabled - skipping policy checks") + else: + logger.info("[APPROVAL DEBUG] Skipping policy check - user has already approved") + + if not adapter._base_tool_provider: + raise ValueError("tool_provider is required") + + # Get total tool count across ALL apps (for shortlisting threshold - not per app) + all_tools_total = await adapter._base_tool_provider.get_all_tools() + total_tool_count = len(all_tools_total) if all_tools_total else 0 + + # Get tools from provider + apps_for_prompt = None + app_to_tools_map = {} + + # Get apps from state and filter tools if specific app is selected + if state.sub_task_app: + # Specific app selected - filter tools to only this app + all_apps = await adapter._base_tool_provider.get_apps() + # add here the implementation of force_ + force_lite_apps = getattr(settings.advanced_features, 'force_lite_mode_apps', []) + if force_lite_apps: + allowed_apps_names = list(set([state.sub_task_app] + force_lite_apps)) + if _web_search_enabled(): + allowed_apps_names.append("web") + # call authenticate_apps for the allowed apps + if settings.advanced_features.benchmark == "appworld": + await TaskAnalyzer.call_authenticate_apps(force_lite_apps) + apps_for_prompt = [app for app in all_apps if app.name in allowed_apps_names] + else: + apps_for_prompt = [app for app in all_apps if app.name == state.sub_task_app] + apps_for_prompt = _ensure_web_app(apps_for_prompt, all_apps) + # Get only tools for this specific app + tools_for_execution = [] + for app in apps_for_prompt: + current_tools_for_execution = await adapter._base_tool_provider.get_tools(app.name) + app_to_tools_map[app.name] = current_tools_for_execution + tools_for_execution.extend(current_tools_for_execution) + + logger.info( + f"Filtered to {len(tools_for_execution)} tools for {len(apps_for_prompt)} identified apps" + ) + elif state.api_intent_relevant_apps: + # Filter to API apps + all_apps = await adapter._base_tool_provider.get_apps() + apps_for_prompt = [ + app for app in state.api_intent_relevant_apps if hasattr(app, 'type') and app.type == 'api' + ] + apps_for_prompt = _ensure_web_app(apps_for_prompt, all_apps) + # Get tools only for the identified apps + tools_for_execution = [] + for app in apps_for_prompt: + app_tools = await adapter._base_tool_provider.get_tools(app.name) + app_to_tools_map[app.name] = app_tools + tools_for_execution.extend(app_tools) + logger.info( + f"Filtered to {len(tools_for_execution)} tools for {len(apps_for_prompt)} identified apps" + ) + else: + # Get all tools and apps + all_apps = await adapter._base_tool_provider.get_apps() + apps_for_prompt = all_apps + tools_for_execution = all_tools_total or [] + # Build mapping for all apps + for app in apps_for_prompt: + app_tools = await adapter._base_tool_provider.get_tools(app.name) + app_to_tools_map[app.name] = app_tools + + enable_find_tools = total_tool_count > shortlisting_threshold or _web_search_enabled() + + if enable_find_tools: + logger.info( + f"Auto-enabling find_tools: total {total_tool_count} tools (across all apps) exceeds threshold of {shortlisting_threshold}" + ) + + # Prepare prompt + is_autonomous_subtask = state.sub_task is not None and state.sub_task.strip() != "" + + # TODO: Add task loaded from file support this happens when we load file as playboook + task_loaded_from_file = False # Not used in current flow + + # Prepare tools for prompt - if find_tools enabled, only expose find_tools + tools_for_prompt = tools_for_execution + if enable_find_tools: + active_model = configurable.get("llm") + find_tool = await create_find_tools_tool( + all_tools=tools_for_execution, + all_apps=apps_for_prompt, + app_to_tools_map=app_to_tools_map, + llm=active_model, + initial_user_message=_first_user_message_text(state.chat_messages), + ) + tools_for_prompt = [find_tool] + # Add find_tools to tools context for sandbox execution + # Wrap to make awaitable (agent always uses await) + # Prefer coroutine over func to avoid run_in_executor issues + find_tool_func = ( + find_tool.coroutine + if hasattr(find_tool, 'coroutine') and find_tool.coroutine + else find_tool.func + ) + adapter._tools_context['find_tools'] = make_tool_awaitable(find_tool_func) + if lc_bind_tools_meta is not None: + lc_bind_tools_meta["_lc_bind_tools_find_tools"] = find_tool + logger.info( + "Exposing only find_tools in prompt (all tools + find_tools available in execution context)" + ) + + if few_shots_enabled: + if "mcp_few_shot_examples" in configurable: + raw_fs = configurable["mcp_few_shot_examples"] + if raw_fs is not None: + few_shot_examples = normalize_mcp_few_shot_examples(raw_fs) + elif enable_find_tools: + few_shot_examples = _load_default_find_tools_few_shot_examples() + else: + few_shot_examples = [] + elif enable_find_tools: + few_shot_examples = _load_default_find_tools_few_shot_examples() + else: + few_shot_examples = [] + logger.debug( + "Bundled MCP few-shots (prompts/find_tools_few_shot_examples.json) not loaded: find_tools " + "is off " + f"(total_tool_count={total_tool_count} <= shortlisting_tool_threshold=" + f"{shortlisting_threshold}). Lower the threshold via configurable or add apps/tools." + ) + else: + few_shot_examples = [] + logger.debug("MCP few-shots disabled (cuga_lite_enable_few_shots=false)") + if few_shot_examples: + logger.debug(f"MCP few-shot examples: {len(few_shot_examples)} turns") + + # Add create_update_todos tool for complex task management if enabled + if enable_todos: + todos_tool = await create_update_todos_tool( + agent_state=state, todos_store_ref=adapter._task_todos_ref + ) + tools_for_prompt.append(todos_tool) + # Add to tools context for sandbox execution + # Prefer coroutine over func to avoid run_in_executor issues + todos_tool_func = ( + todos_tool.coroutine + if hasattr(todos_tool, 'coroutine') and todos_tool.coroutine + else todos_tool.func + ) + adapter._tools_context['create_update_todos'] = make_tool_awaitable(todos_tool_func) + + # Apply tool guide if guides exist in metadata and haven't been applied yet + # Guides should apply regardless of whether a playbook matched + if settings.policy.enabled and state.cuga_lite_metadata: + # Check if guides exist (either as separate guides list or legacy format) + has_guides = ( + state.cuga_lite_metadata.get("guides") + or state.cuga_lite_metadata.get("guide_content") + or state.cuga_lite_metadata.get("policy_type") == "tool_guide" + or state.cuga_lite_metadata.get("has_guides", False) + ) + + if has_guides: + tools_for_execution = PolicyEnactment.apply_tool_guide( + tools_for_execution, state.cuga_lite_metadata + ) + tools_for_prompt = PolicyEnactment.apply_tool_guide( + tools_for_prompt, state.cuga_lite_metadata + ) + # Mark guides as applied to prevent re-application + state.cuga_lite_metadata["guides_applied"] = True + logger.info("Applied tool guide from policy") + else: + logger.debug("No tool guides found in metadata") + + skill_tools = [] + skills_prompt_section = "" + skills_enabled = False + configurable_special = ( + (config or {}).get("configurable", {}).get("special_instructions") if config else None + ) + effective_special = adapter._special_instructions or configurable_special or "" + skills_cfg_on = getattr(settings.skills, "enabled", False) + cuga_folder_for_skills = os.getenv("CUGA_FOLDER", settings.policy.cuga_folder) + if skills_cfg_on: + skill_entries = discover_skills(cuga_folder_for_skills) + if skill_entries: + skill_registry = SkillRegistry(skill_entries) + skill_tools = create_skill_tools(skill_registry) + tools_for_prompt.extend(skill_tools) + skills_prompt_section = format_available_skills_block(skill_registry) + skills_enabled = True + logger.info( + f"Loaded {len(skill_entries)} agent skill(s) from .agents/skills and " + f"~/.config/agents/skills with legacy {cuga_folder_for_skills}/skills and " + "~/.config/cuga/skills fallbacks" + ) + + # Resolve thread_id early for per-thread workspace selection. + _cfg_for_thread = config.get("configurable", {}) if config else {} + _runtime_thread_id_for_fs = _cfg_for_thread.get("thread_id") or state.thread_id or adapter._thread_id + + # Update tools context with all execution tools. + # Wrap to make awaitable (agent always uses await). Filesystem path + # rewriting is no longer needed here — filesystem tools come from + # the consolidated runtime class below, not from MCP. + for tool in tools_for_execution: + # Extract tool function - StructuredTool may use .func, .coroutine, or ._run + # IMPORTANT: Prefer coroutine over func to avoid run_in_executor issues + # with tools that have async implementations (like MCP tools) + tool_func = None + if hasattr(tool, 'coroutine') and tool.coroutine: + # Prefer async coroutine - avoids run_in_executor timeout issues + tool_func = tool.coroutine + elif hasattr(tool, 'func') and tool.func: + tool_func = tool.func + else: + tool_func = getattr(tool, '_run', None) + + if tool_func: + adapter._tools_context[tool.name] = make_tool_awaitable(tool_func) + else: + logger.warning(f"Tool '{tool.name}' has no callable function, skipping") + + for tool in skill_tools: + tool_func = None + if hasattr(tool, "coroutine") and tool.coroutine: + tool_func = tool.coroutine + elif hasattr(tool, "func") and tool.func: + tool_func = tool.func + else: + tool_func = getattr(tool, "_run", None) + if tool_func: + adapter._tools_context[tool.name] = make_tool_awaitable(tool_func) + else: + logger.warning(f"Skill tool '{tool.name}' has no callable, skipping") + + # Inject the consolidated filesystem tools + run_command via the + # shared runtime_tools orchestrator. Backend selection and gating + # live in cuga_agent_core (behavior-identical to the previous + # inline block); filesystem and run_command remain independently + # gated by enable_filesystem_tools / enable_shell_tool. + _runtime_backends = resolve_runtime_backends(settings, configurable) + + if _runtime_backends.filesystem != "none" or _runtime_backends.shell != "none": + cfg = config.get("configurable", {}) if config else {} + runtime_thread_id = ( + cfg["thread_id"] if "thread_id" in cfg else (state.thread_id or adapter._thread_id) + ) + else: + runtime_thread_id = None + + _runtime_bundle = build_runtime_tools(thread_id=runtime_thread_id, backends=_runtime_backends) + adapter._tools_context.update(_runtime_bundle.execution_callables) + tools_for_prompt.extend(_runtime_bundle.prompt_tools) + if _runtime_bundle.app_definitions and apps_for_prompt is not None: + apps_for_prompt = list(apps_for_prompt) + _runtime_bundle.app_definitions + + from cuga.backend.evolve.memory import build_evolve_special_instructions_extension + + special_instructions_final = effective_special or "" + _split_note = split_execution_note(ExecutionRouter.resolve(settings)) + if _split_note: + special_instructions_final = (special_instructions_final + "\n\n" + _split_note).strip() + evolve_extension = await build_evolve_special_instructions_extension( + state=state, + configurable=configurable, + timeout=settings.evolve.timeout, + ) + if evolve_extension: + special_instructions_final = (special_instructions_final or "") + evolve_extension + + cfg = config.get("configurable", {}) if config else {} + _thread_id = cfg.get("thread_id") or "" + _knowledge_engine = cfg.get("knowledge_engine") + if _knowledge_engine is None: + try: + from cuga.backend.server.main import app as _app + + _app_state = getattr(_app.state, "app_state", None) + _knowledge_engine = getattr(_app_state, "knowledge_engine", None) if _app_state else None + except Exception: + _knowledge_engine = None + + allowed_knowledge_scopes, default_knowledge_scope = _get_knowledge_tool_scope_context( + _knowledge_engine, + _thread_id or None, + ) + + knowledge_tool_names = { + tool.name for tool in tools_for_execution if getattr(tool, "name", "").startswith("knowledge_") + } + + if knowledge_tool_names and not allowed_knowledge_scopes: + tools_for_execution = [ + tool for tool in tools_for_execution if getattr(tool, "name", "") not in knowledge_tool_names + ] + tools_for_prompt = [ + tool for tool in tools_for_prompt if getattr(tool, "name", "") not in knowledge_tool_names + ] + apps_for_prompt = [ + app for app in (apps_for_prompt or []) if getattr(app, "name", "") != "knowledge" + ] + for tool_name in knowledge_tool_names: + adapter._tools_context.pop(tool_name, None) + elif knowledge_tool_names: + if _thread_id: + logger.debug("Knowledge tools: thread context available for session scope injection") + + def _wrap_knowledge_tool(fn, tid, allowed_scopes, default_scope): + async def _wrapped(*args, **kwargs): + scope = kwargs.get("scope") + if scope is None and default_scope: + kwargs["scope"] = default_scope + scope = default_scope + if scope is not None and scope not in allowed_scopes: + allowed_text = ", ".join(allowed_scopes) + return { + "error": ( + f"Knowledge scope '{scope}' is unavailable in this context. " + f"Allowed scopes: {allowed_text}" + ) + } + if tid and "session" in allowed_scopes: + kwargs.setdefault("thread_id", tid) + return await fn(*args, **kwargs) + + _wrapped.__doc__ = getattr(fn, "__doc__", None) + _wrapped._knowledge_allowed_scopes = allowed_scopes + _wrapped._knowledge_default_scope = default_scope + _wrapped._knowledge_thread_id = tid + return _wrapped + + for tool_name in knowledge_tool_names: + original_fn = adapter._tools_context.get(tool_name) + if original_fn: + adapter._tools_context[tool_name] = _wrap_knowledge_tool( + original_fn, + _thread_id, + allowed_knowledge_scopes, + default_knowledge_scope, + ) + + # Note: scope rules are injected once via effective_instructions. + # No per-tool decoration needed — avoids repeated text in prompt. + + # Inject knowledge base awareness if knowledge tools are available + effective_instructions = adapter._instructions + # Detect knowledge tools — works for both registry (app named + # "knowledge") and SDK mode (tools under "runtime_tools") + has_knowledge_tools = any(getattr(app, "name", "") == "knowledge" for app in (apps_for_prompt or [])) + if not has_knowledge_tools and tools_for_execution: + has_knowledge_tools = any( + getattr(t, "name", "").startswith("knowledge_") for t in tools_for_execution + ) + knowledge_scope_instruction = _knowledge_scope_instruction( + allowed_knowledge_scopes, + _thread_id or None, + ) + if knowledge_tool_names: + effective_instructions = ( + f"{knowledge_scope_instruction}\n\n{effective_instructions}" + if effective_instructions + else knowledge_scope_instruction + ) + if has_knowledge_tools: + try: + from cuga.backend.knowledge.awareness import ( + get_knowledge_summary, + format_knowledge_context, + get_engine_from_app_state, + ) + + cfg = config.get("configurable", {}) + engine = cfg.get("knowledge_engine") or get_engine_from_app_state() + # Get agent_id: configurable > app_state > fallback + agent_id = cfg.get("agent_id") + knowledge_config_hash = cfg.get("knowledge_config_hash") + if not agent_id: + try: + from cuga.backend.server.main import app as _app + + _as = getattr(_app.state, "app_state", None) + agent_id = getattr(_as, "agent_id", None) if _as else None + if knowledge_config_hash is None: + knowledge_config_hash = ( + getattr(_as, "knowledge_config_hash", None) if _as else None + ) + except Exception: + pass + if not agent_id: + agent_id = "cuga-default" + awareness_thread_id = cfg.get("thread_id") + kb_ctx = format_knowledge_context( + agent_id, + awareness_thread_id, + engine=engine, + agent_config_hash=knowledge_config_hash, + ) + logger.info( + f"Knowledge awareness: agent_id={agent_id}, thread_id={awareness_thread_id}, " + f"agent_collection={kb_ctx.get('agent_collection')}, " + f"session_collection={kb_ctx.get('session_collection')}" + ) + + if not engine: + logger.warning("Knowledge awareness skipped: engine not available") + else: + # Use draft knowledge config for search-time params when running + # in draft mode (Try-It-Out). Published agent always uses engine config. + _search_cfg = engine._config + _is_draft = agent_id and agent_id.endswith("--draft") + if _is_draft: + try: + from cuga.backend.server.main import app as _app + + _das = getattr(_app.state, "draft_app_state", None) + _draft_kc = getattr(_das, "draft_knowledge_config", None) if _das else None + if _draft_kc: + _search_cfg = _draft_kc + except Exception: + pass + knowledge_block = await get_knowledge_summary( + engine, + agent_collection=kb_ctx.get("agent_collection"), + session_collection=kb_ctx.get("session_collection"), + max_search_attempts=getattr(_search_cfg, "max_search_attempts", None) + or getattr(engine._config, "max_search_attempts", None), + default_limit=getattr(_search_cfg, "default_limit", None) + or getattr(engine._config, "default_limit", None), + rag_profile=getattr(_search_cfg, "rag_profile", None) + or getattr(engine._config, "rag_profile", "standard"), + ) + if knowledge_block: + # Load knowledge search instructions from dedicated file + knowledge_instructions_text = "" + try: + kb_instructions_path = ( + Path(__file__).resolve().parents[5] + / "configurations" + / "knowledge" + / "knowledge_instructions.md" + ) + if kb_instructions_path.exists(): + knowledge_instructions_text = kb_instructions_path.read_text( + encoding="utf-8" + ).strip() + except Exception as ki_err: + logger.debug(f"Failed to load knowledge instructions: {ki_err}") + + # Prepend knowledge block BEFORE other instructions + # so the LLM sees it early and acts on it + effective_instructions = ( + f"{knowledge_block}\n\n{knowledge_instructions_text}\n\n{effective_instructions}" + if effective_instructions + else f"{knowledge_block}\n\n{knowledge_instructions_text}" + ) + logger.info(f"Knowledge awareness injected: {len(knowledge_block)} chars") + except Exception as e: + logger.debug(f"Knowledge awareness injection skipped: {e}") + if lc_bind_tools_meta is not None: + lc_bind_tools_meta["_lc_bind_tools_overlay_structured_tools"] = [ + t for t in (tools_for_prompt or []) if getattr(t, "name", None) + ] + + # Create prompt dynamically + dynamic_prompt = adapter._static_prompt + + if not dynamic_prompt: + dynamic_prompt = create_mcp_prompt( + tools_for_prompt, + allow_user_clarification=True, + return_to_user_cases=None, + instructions=effective_instructions, + apps=apps_for_prompt, + task_loaded_from_file=task_loaded_from_file, + is_autonomous_subtask=settings.advanced_features.force_autonomous_mode + or is_autonomous_subtask, + prompt_template=adapter._prompt_template, + enable_find_tools=enable_find_tools, + enable_todos=enable_todos, + special_instructions=special_instructions_final, + skills_enabled=skills_enabled, + skills_prompt_section=skills_prompt_section, + enable_shell_tool=getattr(settings.advanced_features, "enable_shell_tool", False), + has_knowledge=has_knowledge_tools, + few_shot_examples=few_shot_examples, + few_shots_enabled=few_shots_enabled, + ) + logger.info( + "Prepared CugaLite prompt: enable_find_tools={} few_shot_message_turns={} " + "few_shots_as_messages={} prompt_chars={}", + enable_find_tools, + len(few_shot_examples), + bool(few_shot_examples), + len(dynamic_prompt), + ) + else: + logger.info( + "Using static CugaLite prompt; dynamic few-shot injection skipped " + "(enable_find_tools={} few_shot_turns={})", + enable_find_tools, + len(few_shot_examples), + ) + + reflection_apps_snapshot = format_apps_for_prompt(apps_for_prompt or []) + + return Command( + goto="call_model", + update={ + "tools_prepared": True, + "prepared_prompt": dynamic_prompt, + "step_count": 0, + "cuga_lite_metadata": state.cuga_lite_metadata, + "reflection_apps": reflection_apps_snapshot, + "reflection_enable_find_tools": enable_find_tools, + "reflection_skills_enabled": skills_enabled, + "reflection_skills_prompt_section": skills_prompt_section, + "mcp_few_shot_messages": few_shot_examples, + }, + ) + + return prepare_tools_and_apps diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/adapter/response_utils.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/adapter/response_utils.py new file mode 100644 index 00000000..ee007154 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/adapter/response_utils.py @@ -0,0 +1,63 @@ +"""Response and metadata helpers for the Lite graph adapter.""" + +from __future__ import annotations + +import json +from typing import Any, Dict, Optional + +from langchain_core.messages import HumanMessage + + +def clean_empty_response_retry_meta(meta: Optional[Dict[str, Any]]) -> Dict[str, Any]: + cleaned = {**(meta or {})} + cleaned.pop("_empty_response_correction", None) + return cleaned + + +def reflection_current_task(state: Any) -> str: + """Prefer ``sub_task``; else last user message that is not sandbox feedback.""" + if (state.sub_task or "").strip(): + return state.sub_task.strip() + if state.chat_messages: + execution_prefix = "Execution output:" + for msg in reversed(state.chat_messages): + if isinstance(msg, HumanMessage): + content = (msg.content or "").strip() + if content and not content.startswith(execution_prefix): + return content + return "" + + +def tool_call_kwarg_literal(value: Any) -> str: + if isinstance(value, str): + return json.dumps(value, ensure_ascii=False) + return repr(value) + + +def extract_code_from_response_tool_calls(response: Any) -> Optional[str]: + """Recover fenced Python from AIMessage.tool_calls when content is empty.""" + tool_calls = getattr(response, "tool_calls", None) or ( + getattr(response, "additional_kwargs", None) or {} + ).get("tool_calls") + if not tool_calls: + return None + + tool_call = tool_calls[0] + if not isinstance(tool_call, dict): + return None + + name = tool_call.get("name") or (tool_call.get("function") or {}).get("name") + args = tool_call.get("args") or (tool_call.get("function") or {}).get("arguments") or {} + if isinstance(args, str): + try: + args = json.loads(args) + except json.JSONDecodeError: + args = {} + + if not name: + return None + + args_str = ", ".join( + f"{k}={tool_call_kwarg_literal(v)}" for k, v in (args if isinstance(args, dict) else {}).items() + ) + return f"```python\nresult = await {name}({args_str})\nprint(result)\n```" diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/adapter/sandbox_node.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/adapter/sandbox_node.py new file mode 100644 index 00000000..859b6cf2 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/adapter/sandbox_node.py @@ -0,0 +1,231 @@ +"""Sandbox execute node for the CugaLite agent graph.""" + +from __future__ import annotations + +import json +from typing import Any, Callable, Optional + +from langchain_core.messages import AIMessage, HumanMessage +from langchain_core.runnables import RunnableConfig +from loguru import logger + +from cuga.backend.activity_tracker.tracker import Step +from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.todos import extract_task_todos_from_new_vars +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.graph_nodes import ( + append_chat_messages_with_step_limit as core_append_with_step_limit, + create_error_command as core_create_error_command, + execution_output_text, +) +from cuga.backend.cuga_graph.nodes.cuga_agent_core.policy.execution_policy import ExecutionRouter +from cuga.backend.cuga_graph.nodes.cuga_agent_core.policy.tool_approval_handler import ToolApprovalHandler +from cuga.backend.cuga_graph.nodes.cuga_lite.adapter.response_utils import reflection_current_task +from cuga.backend.cuga_graph.nodes.cuga_lite.executors.code_executor import ( + CodeExecutor, + is_find_tools_listing_markdown, +) +from cuga.backend.cuga_graph.nodes.cuga_lite.reflection.reflection import reflection_task +from cuga.backend.llm.models import LLMManager +from cuga.config import settings + +_llm_manager = LLMManager() + + +def create_sandbox_node(adapter: Any, base_thread_id: Any, base_apps_list: Any) -> Callable: + async def sandbox(state: Any, config: Optional[RunnableConfig] = None): + """Execute code in sandbox and return results.""" + from cuga.backend.cuga_graph.nodes.cuga_lite.tracking.tracker import ToolCallTracker + + # Check if user denied approval (only if policies are enabled) + if settings.policy.enabled: + denial_command = ToolApprovalHandler.handle_denial(adapter, state) + if denial_command: + return denial_command + + configurable = config.get("configurable", {}) if config else {} + max_steps = configurable.get("cuga_lite_max_steps") if "cuga_lite_max_steps" in configurable else None + if "thread_id" in configurable: + current_thread_id = configurable["thread_id"] + else: + current_thread_id = state.thread_id or base_thread_id + current_apps_list = configurable.get("apps_list", base_apps_list) + track_tool_calls = configurable.get("track_tool_calls", False) + reflection_enabled = ( + configurable.get("reflection_enabled") + if "reflection_enabled" in configurable + else settings.advanced_features.reflection_enabled + ) + + # Get existing variables using CugaLiteState's own variables_manager + existing_vars = {} + for var_name in list(state.variables_manager.get_variable_names()): + var_value = state.variables_manager.get_variable(var_name) + if is_find_tools_listing_markdown(var_value): + state.variables_manager.remove_variable(var_name) + continue + existing_vars[var_name] = var_value + + # Add tools to context + context = {**existing_vars, **adapter._tools_context} + + # Start tool call tracking (only if enabled via invoke parameter) + ToolCallTracker.start_tracking(enabled=track_tool_calls) + + try: + # Execute the script - pass the CugaLiteState itself since it has variables_manager + _exec_plan = ExecutionRouter.resolve(settings) + if _exec_plan.split_execution_active: + logger.info( + "Split execution: python=%s shell=%s fs=%s", + _exec_plan.python_backend, + _exec_plan.shell_backend, + _exec_plan.filesystem_backend, + ) + output, new_vars = await CodeExecutor.eval_with_tools_async( + code=state.script, + _locals=context, + state=state, # Pass CugaLiteState - it has variables_manager property + thread_id=current_thread_id, + apps_list=current_apps_list, + plan=_exec_plan, + ) + + adapter._tracker.collect_step(step=Step(name="User_output", data=output)) + adapter._tracker.collect_step( + step=Step( + name="User_output_variables", + data=json.dumps( + new_vars, + default=lambda o: o.model_dump() if hasattr(o, "model_dump") else str(o), + ), + ) + ) + + # Output is already formatted and trimmed by code_executor + logger.debug(f"\n\n------\n\n📝 Execution output:\n\n{output}\n\n------\n\n") + + # Update variables using CugaLiteState's variables_manager + # This automatically updates state.variables_storage + for name, value in new_vars.items(): + if is_find_tools_listing_markdown(value): + continue + state.variables_manager.add_variable( + value, name=name, description="Created during code execution" + ) + + reflection_output = "" + if reflection_enabled: + try: + active_model = configurable.get("llm") or _llm_manager.get_model( + settings.agent.planner.model + ) + reflection_agent = reflection_task(llm=active_model) + # Format chat messages as history string + agent_history_parts = [] + for msg in state.chat_messages: + if isinstance(msg, HumanMessage): + agent_history_parts.append(f"User: {msg.content}") + elif isinstance(msg, AIMessage): + agent_history_parts.append(f"Assistant: {msg.content}") + else: + agent_history_parts.append( + f"{type(msg).__name__}: {getattr(msg, 'content', str(msg))}" + ) + agent_history = ( + "\n".join(agent_history_parts) + if agent_history_parts + else "No previous conversation history" + ) + reflection_result = await reflection_agent.ainvoke( + { + "instructions": "", + "current_task": reflection_current_task(state) or "(no task text)", + "agent_history": agent_history, + "coder_agent_output": output, + "apps": state.reflection_apps or [], + "enable_find_tools": state.reflection_enable_find_tools, + "skills_enabled": state.reflection_skills_enabled, + "skills_prompt_section": state.reflection_skills_prompt_section, + "force_autonomous_mode": settings.advanced_features.force_autonomous_mode, + } + ) + reflection_output = reflection_result.content + logger.debug(f"Reflection output:\n{reflection_output}") + except Exception as e: + logger.warning(f"Reflection failed: {e}") + reflection_output = "" + + # Output is already formatted by code_executor + execution_message_content = execution_output_text(output) + if reflection_output: + execution_message_content = ( + f"{execution_message_content}\n\n---\n\nSummary:\n{reflection_output}" + ) + + adapter._tracker.collect_step( + step=Step( + name="User_return", + data=execution_message_content, + ) + ) + + new_message = HumanMessage(content=execution_message_content) + updated_messages, error_message = core_append_with_step_limit( + adapter, state, [new_message], max_steps + ) + + # Collect tool calls from this execution + execution_tool_calls = ToolCallTracker.stop_tracking() + accumulated_tool_calls = (state.tool_calls or []) + execution_tool_calls + + if error_message: + return core_create_error_command( + adapter, + updated_messages, + error_message, + state.step_count, + additional_updates={ + "variables_storage": state.variables_storage, + "variable_counter_state": state.variable_counter_state, + "variable_creation_order": state.variable_creation_order, + "tool_calls": accumulated_tool_calls, + }, + ) + + todo_state_update = extract_task_todos_from_new_vars(new_vars) + base_update = { + "chat_messages": updated_messages, + "variables_storage": state.variables_storage, + "variable_counter_state": state.variable_counter_state, + "variable_creation_order": state.variable_creation_order, + "step_count": state.step_count + 1, + "tool_calls": accumulated_tool_calls, + } + if todo_state_update is not None: + base_update["task_todos"] = todo_state_update + return base_update + except Exception as e: + # Collect tool calls even on error + execution_tool_calls = ToolCallTracker.stop_tracking() + accumulated_tool_calls = (state.tool_calls or []) + execution_tool_calls + + error_msg = f"Error during execution: {str(e)}" + logger.error(error_msg) + new_message = HumanMessage(content=error_msg) + updated_messages, limit_error_message = core_append_with_step_limit( + adapter, state, [new_message], max_steps + ) + + if limit_error_message: + return core_create_error_command( + adapter, updated_messages, limit_error_message, state.step_count + ) + + return { + "chat_messages": updated_messages, + "error": error_msg, + "execution_complete": True, + "step_count": state.step_count + 1, + "tool_calls": accumulated_tool_calls, + } + + return sandbox diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/agent_graph_adapter.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/agent_graph_adapter.py index 55a71e63..39b733c7 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/agent_graph_adapter.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/agent_graph_adapter.py @@ -1,1103 +1,8 @@ -"""AgentGraphAdapter — CoreGraphAdapter implementation for CugaLite (single-agent). +"""Backward-compatible re-export of AgentGraphAdapter. -Provides all hook overrides that the shared ``create_call_model_node`` factory -delegates to for Lite-specific behaviour: - -- Few-shot messages, PI injection, todos system block -- normalize_response: normalize_assistant_text + tool-call code recovery -- Tracker side-effects, Langfuse callbacks -- Metadata cleanup (_clean_empty_response_retry_meta) -- NL auto-continue via classify_nl_auto_continue - -Also houses the format_task_todos_system_block / format_current_plan_section -helpers that were previously defined in cuga_lite_graph.py; cuga_lite_graph.py -imports them from here in Phase 6. +Implementation lives in ``cuga_lite.adapter`` (graph_adapter, prepare_node, sandbox_node). """ -from __future__ import annotations - -import json -import os -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple - -from langchain_core.messages import AIMessage, BaseMessage, HumanMessage -from langchain_core.runnables import RunnableConfig -from langgraph.types import Command -from loguru import logger - -from cuga.backend.activity_tracker.tracker import Step -from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.graph_nodes import ( - CoreGraphAdapter, - append_chat_messages_with_step_limit as _core_append_with_step_limit, - create_error_command as _core_create_error_command, - execution_output_text, -) -from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.code_extraction import make_tool_awaitable -from cuga.backend.cuga_graph.nodes.cuga_agent_core.policy.execution_policy import ( - ExecutionRouter, - split_execution_note, -) -from cuga.backend.cuga_graph.nodes.cuga_agent_core.tools.runtime_tools import ( - build_runtime_tools, - resolve_runtime_backends, -) -from cuga.backend.cuga_graph.nodes.cuga_lite.executors.code_executor import ( - CodeExecutor, - is_find_tools_listing_markdown, -) -from cuga.backend.cuga_graph.nodes.cuga_lite.model_runtime_profile import ( - resolved_runtime_model_name, -) -from cuga.backend.cuga_graph.nodes.cuga_lite.nl_auto_continue_classifier import ( - classify_nl_auto_continue, - normalize_assistant_text, -) -from cuga.backend.cuga_graph.nodes.cuga_lite.prompt_utils import ( - create_mcp_prompt, - format_apps_for_prompt, - normalize_mcp_few_shot_examples, - resolve_cuga_lite_few_shots_enabled, -) -from cuga.backend.cuga_graph.nodes.cuga_lite.reflection.reflection import reflection_task -from cuga.backend.cuga_graph.nodes.cuga_lite.tool_approval_handler import ToolApprovalHandler -from cuga.backend.cuga_graph.nodes.task_decomposition_planning.analyze_task import TaskAnalyzer -from cuga.backend.cuga_graph.policy.enactment import PolicyEnactment -from cuga.backend.llm.errors import extract_code_from_tool_use_failed -from cuga.backend.llm.models import LLMManager -from cuga.backend.skills import ( - SkillRegistry, - create_skill_tools, - discover_skills, - format_available_skills_block, -) -from cuga.config import settings - -# ── Helpers (imported from cuga_lite/helpers/) ───────────────────────────── - -from cuga.backend.cuga_graph.nodes.cuga_lite.helpers.bind_tools import ( - resolve_model_with_bind_tools, -) -from cuga.backend.cuga_graph.nodes.cuga_lite.helpers.find_tools import ( - _first_user_message_text, - create_find_tools_tool, - _load_default_find_tools_few_shot_examples, - _ensure_web_app, - _web_search_enabled, -) -from cuga.backend.cuga_graph.nodes.cuga_lite.helpers.knowledge import ( - _get_knowledge_tool_scope_context, - _knowledge_scope_instruction, -) -from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.todos import ( - create_update_todos_tool, - extract_task_todos_from_new_vars, - format_current_plan_section, - format_task_todos_system_block, -) - -_llm_manager = LLMManager() - - -def _clean_empty_response_retry_meta(meta: Optional[Dict[str, Any]]) -> Dict[str, Any]: - m = {**(meta or {})} - m.pop("_empty_response_correction", None) - return m - - -def _reflection_current_task(state: Any) -> str: - """Prefer ``sub_task``; else last user message that is not sandbox ``Execution output`` feedback.""" - if (state.sub_task or "").strip(): - return state.sub_task.strip() - if state.chat_messages: - execution_prefix = "Execution output:" - for msg in reversed(state.chat_messages): - if isinstance(msg, HumanMessage): - c = (msg.content or "").strip() - if c and not c.startswith(execution_prefix): - return c - return "" - - -def _tool_call_kwarg_literal(value: Any) -> str: - if isinstance(value, str): - return json.dumps(value, ensure_ascii=False) - return repr(value) - - -def _extract_code_from_response_tool_calls(response: Any) -> Optional[str]: - """Recover fenced Python from AIMessage.tool_calls when content is empty.""" - tool_calls = getattr(response, "tool_calls", None) or ( - getattr(response, "additional_kwargs", None) or {} - ).get("tool_calls") - if not tool_calls: - return None - - tc = tool_calls[0] - if not isinstance(tc, dict): - return None - - name = tc.get("name") or (tc.get("function") or {}).get("name") - args = tc.get("args") or (tc.get("function") or {}).get("arguments") or {} - if isinstance(args, str): - try: - args = json.loads(args) - except json.JSONDecodeError: - args = {} - - if not name: - return None - - args_str = ", ".join( - f"{k}={_tool_call_kwarg_literal(v)}" for k, v in (args if isinstance(args, dict) else {}).items() - ) - return f"```python\nresult = await {name}({args_str})\nprint(result)\n```" - - -# ── AgentGraphAdapter ────────────────────────────────────────────────────── - - -class AgentGraphAdapter(CoreGraphAdapter): - """CoreGraphAdapter implementation for the CugaLite single-agent graph. - - Overrides all call_model hooks that differ from the no-op defaults: - few-shot messages, PI, todos, normalize, tracker, callbacks, metadata - cleanup, and NL auto-continue. - """ - - messages_key: str = "chat_messages" - execute_node_name: str = "sandbox" - metadata_key: str = "cuga_lite_metadata" - sender_name: str = "CugaLite" - - def __init__( - self, - *, - tracker: Any, - base_callbacks: Optional[List[Any]], - task_todos_ref: List[Dict[str, str]], - tools_context_ref: Optional[Dict[str, Any]], - base_tool_provider: Any, - model: Any = None, - prompt_template: Any = None, - instructions: Any = None, - special_instructions: Any = None, - tools_context: Optional[Dict[str, Any]] = None, - static_prompt: Any = None, - thread_id: Any = None, - ) -> None: - self._tracker = tracker - self._base_callbacks = base_callbacks or [] - self._task_todos_ref = task_todos_ref - self._tools_context_ref = tools_context_ref - self._base_tool_provider = base_tool_provider - self._model = model - self._prompt_template = prompt_template - self._instructions = instructions - self._special_instructions = special_instructions - self._tools_context = tools_context if tools_context is not None else {} - self._static_prompt = static_prompt - self._thread_id = thread_id - - # ── Abstract method implementations ─────────────────────────────────── - - def get_messages(self, state: Any) -> List[BaseMessage]: - return list(state.chat_messages or []) - - def resolve_max_steps(self, state: Any, override: Optional[int]) -> int: - if override is not None: - return override - return ( - state.cuga_lite_max_steps - if getattr(state, "cuga_lite_max_steps", None) is not None - else getattr(settings.advanced_features, "cuga_lite_max_steps", 50) - ) - - # ── Pre-invocation hook overrides ───────────────────────────────────── - - def get_few_shot_messages(self, state: Any) -> List[Any]: - return list(state.mcp_few_shot_messages or []) - - def get_pi(self, state: Any) -> Optional[str]: - return getattr(state, "pi", None) - - def prepare_system_content(self, state: Any, configurable: dict, base_prompt: str) -> str: - if self._task_todos_ref: - return base_prompt + format_task_todos_system_block(self._task_todos_ref) - task_todos = getattr(state, "task_todos", None) - if task_todos: - return base_prompt + format_current_plan_section(task_todos) - return base_prompt - - def get_tracker(self) -> Any: - return self._tracker - - def get_invoke_config(self, configurable: dict) -> dict: - callbacks = configurable.get("callbacks", self._base_callbacks) - return {"callbacks": callbacks} - - async def ainvoke_model(self, bound: Any, messages: list, invoke_config: dict) -> Any: - try: - return await bound.ainvoke(messages, config=invoke_config) - except Exception as exc: - code = extract_code_from_tool_use_failed(exc) - if code: - logger.warning( - "Model attempted tool call without tools bound (tool_use_failed). " - "Using generated code in sandbox" - ) - - class _FakeResponse: - content = f"```python\n{code}\n```" - additional_kwargs: dict = {} - - return _FakeResponse() - raise - - async def resolve_bind_tools(self, state: Any, active_model: Any, configurable: dict) -> Any: - try: - return await resolve_model_with_bind_tools( - active_model, - configurable=configurable, - tools_context_ref=self._tools_context_ref, - tool_provider=self._base_tool_provider, - ) - except Exception as exc: - logger.warning("AgentGraphAdapter.resolve_bind_tools failed: %s", exc) - return None - - # ── Post-invocation hook overrides ──────────────────────────────────── - - def normalize_response(self, response: Any) -> Tuple[str, Optional[str]]: - content = normalize_assistant_text(response.content) - if not content: - tool_code = _extract_code_from_response_tool_calls(response) - if tool_code: - logger.warning("Empty content with tool_calls detected; recovering tool call as Python code") - content = tool_code - reasoning = normalize_assistant_text( - (getattr(response, "additional_kwargs", None) or {}).get("reasoning_content") - ) - return content, reasoning - - def on_response_processed(self, state: Any, code: Optional[str], content: str) -> None: - try: - self._tracker.collect_step(step=Step(name="Raw_Assistant_Response", data=content)) - if code: - self._tracker.collect_step(step=Step(name="Assistant_code", data=content)) - else: - self._tracker.collect_step(step=Step(name="Assistant_nl", data=content)) - except Exception as exc: - logger.debug("AgentGraphAdapter.on_response_processed tracker error: %s", exc) - - def build_metadata_update(self, state: Any, *, playbook_fired: bool) -> dict: - meta = _clean_empty_response_retry_meta(self.get_metadata(state)) - if playbook_fired: - return {**meta, "playbook_guidance_added": True} - return meta - - async def classify_auto_continue( - self, state: Any, model: Any, content: str, reasoning: Optional[str] - ) -> bool: - return await classify_nl_auto_continue(model, content, reasoning) - - # ── Node factory methods (Tasks 4c and 5b) ──────────────────────────── - - def build_prepare_node(self, lc_bind_tools_meta: dict): - """Return the prepare_tools_and_apps async node.""" - - async def prepare_tools_and_apps(state: Any, config: Optional[RunnableConfig] = None) -> Command: - """Prepare tools, apps, and prompt once at the start of the graph. - - This node gets tools from tool_provider, filters based on state configuration, - determines if find_tools should be enabled, and prepares the prompt. - Tools are available via closure (per graph instance), prompt is stored in state. - - enable_todos is read from config["configurable"] at runtime. - - Optional configurable key ``mcp_few_shot_examples``: overrides few-shots—a JSON string or - list of dicts with ``role`` and ``content``. If absent (or explicitly ``None``) and - ``find_tools`` is enabled, ``prompts/find_tools_few_shot_examples.json`` (bundled next to the - MCP template) is loaded, with optional fallback to repo ``samples/cuga_lite/mcp_few_shot_examples.json``. - Bundled few-shots only apply when ``find_tools`` shortlisting is active - (``total_tool_count > shortlisting_tool_threshold``, see settings configurable). - - Disable few-shots entirely via ``advanced_features.cuga_lite_enable_few_shots`` in settings.toml - or ``cuga_lite_enable_few_shots`` in configurable (skips prefix chat few-shots). - """ - configurable = config.get("configurable", {}) if config else {} - enable_todos = ( - configurable.get("enable_todos") - if "enable_todos" in configurable - else settings.advanced_features.enable_todos - ) - shortlisting_threshold = ( - configurable.get("shortlisting_tool_threshold") - if "shortlisting_tool_threshold" in configurable - else settings.advanced_features.shortlisting_tool_threshold - ) - _runtime_model_name = resolved_runtime_model_name( - configurable_llm=configurable.get("llm"), - graph_default_model=self._model, - ) - few_shots_enabled = resolve_cuga_lite_few_shots_enabled( - configurable, - model_name=_runtime_model_name, - ) - logger.debug( - f"[APPROVAL DEBUG] prepare_tools_and_apps received cuga_lite_metadata: {state.cuga_lite_metadata}" - ) - - # Skip policy checking if policies are disabled or if we're returning from approval - if settings.policy.enabled and not ToolApprovalHandler.should_skip_policy_check(self, state): - # Check for policies and enact if matched - # Include IntentGuard, Playbook, and ToolGuide for intent checks - from cuga.backend.cuga_graph.policy.models import PolicyType - - command, metadata = await PolicyEnactment.check_and_enact( - state, - config, - policy_types=[PolicyType.INTENT_GUARD, PolicyType.PLAYBOOK, PolicyType.TOOL_GUIDE], - adapter=self, - ) - - # If policy returned a command (e.g., BLOCK_INTENT), execute it immediately - if command: - return command - - # If policy returned metadata (e.g., playbook guidance), store it - if metadata: - self.set_metadata(state, metadata) - elif not settings.policy.enabled: - logger.debug("Policy system disabled - skipping policy checks") - else: - logger.info("[APPROVAL DEBUG] Skipping policy check - user has already approved") - - if not self._base_tool_provider: - raise ValueError("tool_provider is required") - - # Get total tool count across ALL apps (for shortlisting threshold - not per app) - all_tools_total = await self._base_tool_provider.get_all_tools() - total_tool_count = len(all_tools_total) if all_tools_total else 0 - - # Get tools from provider - apps_for_prompt = None - app_to_tools_map = {} - - # Get apps from state and filter tools if specific app is selected - if state.sub_task_app: - # Specific app selected - filter tools to only this app - all_apps = await self._base_tool_provider.get_apps() - # add here the implementation of force_ - force_lite_apps = getattr(settings.advanced_features, 'force_lite_mode_apps', []) - if force_lite_apps: - allowed_apps_names = list(set([state.sub_task_app] + force_lite_apps)) - if _web_search_enabled(): - allowed_apps_names.append("web") - # call authenticate_apps for the allowed apps - if settings.advanced_features.benchmark == "appworld": - await TaskAnalyzer.call_authenticate_apps(force_lite_apps) - apps_for_prompt = [app for app in all_apps if app.name in allowed_apps_names] - else: - apps_for_prompt = [app for app in all_apps if app.name == state.sub_task_app] - apps_for_prompt = _ensure_web_app(apps_for_prompt, all_apps) - # Get only tools for this specific app - tools_for_execution = [] - for app in apps_for_prompt: - current_tools_for_execution = await self._base_tool_provider.get_tools(app.name) - app_to_tools_map[app.name] = current_tools_for_execution - tools_for_execution.extend(current_tools_for_execution) - - logger.info( - f"Filtered to {len(tools_for_execution)} tools for {len(apps_for_prompt)} identified apps" - ) - elif state.api_intent_relevant_apps: - # Filter to API apps - all_apps = await self._base_tool_provider.get_apps() - apps_for_prompt = [ - app - for app in state.api_intent_relevant_apps - if hasattr(app, 'type') and app.type == 'api' - ] - apps_for_prompt = _ensure_web_app(apps_for_prompt, all_apps) - # Get tools only for the identified apps - tools_for_execution = [] - for app in apps_for_prompt: - app_tools = await self._base_tool_provider.get_tools(app.name) - app_to_tools_map[app.name] = app_tools - tools_for_execution.extend(app_tools) - logger.info( - f"Filtered to {len(tools_for_execution)} tools for {len(apps_for_prompt)} identified apps" - ) - else: - # Get all tools and apps - all_apps = await self._base_tool_provider.get_apps() - apps_for_prompt = all_apps - tools_for_execution = all_tools_total or [] - # Build mapping for all apps - for app in apps_for_prompt: - app_tools = await self._base_tool_provider.get_tools(app.name) - app_to_tools_map[app.name] = app_tools - - enable_find_tools = total_tool_count > shortlisting_threshold or _web_search_enabled() - - if enable_find_tools: - logger.info( - f"Auto-enabling find_tools: total {total_tool_count} tools (across all apps) exceeds threshold of {shortlisting_threshold}" - ) - - # Prepare prompt - is_autonomous_subtask = state.sub_task is not None and state.sub_task.strip() != "" - - # TODO: Add task loaded from file support this happens when we load file as playboook - task_loaded_from_file = False # Not used in current flow - - # Prepare tools for prompt - if find_tools enabled, only expose find_tools - tools_for_prompt = tools_for_execution - if enable_find_tools: - active_model = configurable.get("llm") - find_tool = await create_find_tools_tool( - all_tools=tools_for_execution, - all_apps=apps_for_prompt, - app_to_tools_map=app_to_tools_map, - llm=active_model, - initial_user_message=_first_user_message_text(state.chat_messages), - ) - tools_for_prompt = [find_tool] - # Add find_tools to tools context for sandbox execution - # Wrap to make awaitable (agent always uses await) - # Prefer coroutine over func to avoid run_in_executor issues - find_tool_func = ( - find_tool.coroutine - if hasattr(find_tool, 'coroutine') and find_tool.coroutine - else find_tool.func - ) - self._tools_context['find_tools'] = make_tool_awaitable(find_tool_func) - if lc_bind_tools_meta is not None: - lc_bind_tools_meta["_lc_bind_tools_find_tools"] = find_tool - logger.info( - "Exposing only find_tools in prompt (all tools + find_tools available in execution context)" - ) - - if few_shots_enabled: - if "mcp_few_shot_examples" in configurable: - raw_fs = configurable["mcp_few_shot_examples"] - if raw_fs is not None: - few_shot_examples = normalize_mcp_few_shot_examples(raw_fs) - elif enable_find_tools: - few_shot_examples = _load_default_find_tools_few_shot_examples() - else: - few_shot_examples = [] - elif enable_find_tools: - few_shot_examples = _load_default_find_tools_few_shot_examples() - else: - few_shot_examples = [] - logger.debug( - "Bundled MCP few-shots (prompts/find_tools_few_shot_examples.json) not loaded: find_tools " - "is off " - f"(total_tool_count={total_tool_count} <= shortlisting_tool_threshold=" - f"{shortlisting_threshold}). Lower the threshold via configurable or add apps/tools." - ) - else: - few_shot_examples = [] - logger.debug("MCP few-shots disabled (cuga_lite_enable_few_shots=false)") - if few_shot_examples: - logger.debug(f"MCP few-shot examples: {len(few_shot_examples)} turns") - - # Add create_update_todos tool for complex task management if enabled - if enable_todos: - todos_tool = await create_update_todos_tool( - agent_state=state, todos_store_ref=self._task_todos_ref - ) - tools_for_prompt.append(todos_tool) - # Add to tools context for sandbox execution - # Prefer coroutine over func to avoid run_in_executor issues - todos_tool_func = ( - todos_tool.coroutine - if hasattr(todos_tool, 'coroutine') and todos_tool.coroutine - else todos_tool.func - ) - self._tools_context['create_update_todos'] = make_tool_awaitable(todos_tool_func) - - # Apply tool guide if guides exist in metadata and haven't been applied yet - # Guides should apply regardless of whether a playbook matched - if settings.policy.enabled and state.cuga_lite_metadata: - # Check if guides exist (either as separate guides list or legacy format) - has_guides = ( - state.cuga_lite_metadata.get("guides") - or state.cuga_lite_metadata.get("guide_content") - or state.cuga_lite_metadata.get("policy_type") == "tool_guide" - or state.cuga_lite_metadata.get("has_guides", False) - ) - - if has_guides: - tools_for_execution = PolicyEnactment.apply_tool_guide( - tools_for_execution, state.cuga_lite_metadata - ) - tools_for_prompt = PolicyEnactment.apply_tool_guide( - tools_for_prompt, state.cuga_lite_metadata - ) - # Mark guides as applied to prevent re-application - state.cuga_lite_metadata["guides_applied"] = True - logger.info("Applied tool guide from policy") - else: - logger.debug("No tool guides found in metadata") - - skill_tools = [] - skills_prompt_section = "" - skills_enabled = False - configurable_special = ( - (config or {}).get("configurable", {}).get("special_instructions") if config else None - ) - effective_special = self._special_instructions or configurable_special or "" - skills_cfg_on = getattr(settings.skills, "enabled", False) - cuga_folder_for_skills = os.getenv("CUGA_FOLDER", settings.policy.cuga_folder) - if skills_cfg_on: - skill_entries = discover_skills(cuga_folder_for_skills) - if skill_entries: - skill_registry = SkillRegistry(skill_entries) - skill_tools = create_skill_tools(skill_registry) - tools_for_prompt.extend(skill_tools) - skills_prompt_section = format_available_skills_block(skill_registry) - skills_enabled = True - logger.info( - f"Loaded {len(skill_entries)} agent skill(s) from .agents/skills and " - f"~/.config/agents/skills with legacy {cuga_folder_for_skills}/skills and " - "~/.config/cuga/skills fallbacks" - ) - - # Resolve thread_id early for per-thread workspace selection. - _cfg_for_thread = config.get("configurable", {}) if config else {} - _runtime_thread_id_for_fs = _cfg_for_thread.get("thread_id") or state.thread_id or self._thread_id - - # Update tools context with all execution tools. - # Wrap to make awaitable (agent always uses await). Filesystem path - # rewriting is no longer needed here — filesystem tools come from - # the consolidated runtime class below, not from MCP. - for tool in tools_for_execution: - # Extract tool function - StructuredTool may use .func, .coroutine, or ._run - # IMPORTANT: Prefer coroutine over func to avoid run_in_executor issues - # with tools that have async implementations (like MCP tools) - tool_func = None - if hasattr(tool, 'coroutine') and tool.coroutine: - # Prefer async coroutine - avoids run_in_executor timeout issues - tool_func = tool.coroutine - elif hasattr(tool, 'func') and tool.func: - tool_func = tool.func - else: - tool_func = getattr(tool, '_run', None) - - if tool_func: - self._tools_context[tool.name] = make_tool_awaitable(tool_func) - else: - logger.warning(f"Tool '{tool.name}' has no callable function, skipping") - - for tool in skill_tools: - tool_func = None - if hasattr(tool, "coroutine") and tool.coroutine: - tool_func = tool.coroutine - elif hasattr(tool, "func") and tool.func: - tool_func = tool.func - else: - tool_func = getattr(tool, "_run", None) - if tool_func: - self._tools_context[tool.name] = make_tool_awaitable(tool_func) - else: - logger.warning(f"Skill tool '{tool.name}' has no callable, skipping") - - # Inject the consolidated filesystem tools + run_command via the - # shared runtime_tools orchestrator. Backend selection and gating - # live in cuga_agent_core (behavior-identical to the previous - # inline block); filesystem and run_command remain independently - # gated by enable_filesystem_tools / enable_shell_tool. - _runtime_backends = resolve_runtime_backends(settings, configurable) - - if _runtime_backends.filesystem != "none" or _runtime_backends.shell != "none": - cfg = config.get("configurable", {}) if config else {} - runtime_thread_id = ( - cfg["thread_id"] if "thread_id" in cfg else (state.thread_id or self._thread_id) - ) - else: - runtime_thread_id = None - - _runtime_bundle = build_runtime_tools(thread_id=runtime_thread_id, backends=_runtime_backends) - self._tools_context.update(_runtime_bundle.execution_callables) - tools_for_prompt.extend(_runtime_bundle.prompt_tools) - if _runtime_bundle.app_definitions and apps_for_prompt is not None: - apps_for_prompt = list(apps_for_prompt) + _runtime_bundle.app_definitions - - from cuga.backend.evolve.memory import build_evolve_special_instructions_extension - - special_instructions_final = effective_special or "" - _split_note = split_execution_note(ExecutionRouter.resolve(settings)) - if _split_note: - special_instructions_final = (special_instructions_final + "\n\n" + _split_note).strip() - evolve_extension = await build_evolve_special_instructions_extension( - state=state, - configurable=configurable, - timeout=settings.evolve.timeout, - ) - if evolve_extension: - special_instructions_final = (special_instructions_final or "") + evolve_extension - - cfg = config.get("configurable", {}) if config else {} - _thread_id = cfg.get("thread_id") or "" - _knowledge_engine = cfg.get("knowledge_engine") - if _knowledge_engine is None: - try: - from cuga.backend.server.main import app as _app - - _app_state = getattr(_app.state, "app_state", None) - _knowledge_engine = getattr(_app_state, "knowledge_engine", None) if _app_state else None - except Exception: - _knowledge_engine = None - - allowed_knowledge_scopes, default_knowledge_scope = _get_knowledge_tool_scope_context( - _knowledge_engine, - _thread_id or None, - ) - - knowledge_tool_names = { - tool.name - for tool in tools_for_execution - if getattr(tool, "name", "").startswith("knowledge_") - } - - if knowledge_tool_names and not allowed_knowledge_scopes: - tools_for_execution = [ - tool - for tool in tools_for_execution - if getattr(tool, "name", "") not in knowledge_tool_names - ] - tools_for_prompt = [ - tool for tool in tools_for_prompt if getattr(tool, "name", "") not in knowledge_tool_names - ] - apps_for_prompt = [ - app for app in (apps_for_prompt or []) if getattr(app, "name", "") != "knowledge" - ] - for tool_name in knowledge_tool_names: - self._tools_context.pop(tool_name, None) - elif knowledge_tool_names: - if _thread_id: - logger.debug("Knowledge tools: thread context available for session scope injection") - - def _wrap_knowledge_tool(fn, tid, allowed_scopes, default_scope): - async def _wrapped(*args, **kwargs): - scope = kwargs.get("scope") - if scope is None and default_scope: - kwargs["scope"] = default_scope - scope = default_scope - if scope is not None and scope not in allowed_scopes: - allowed_text = ", ".join(allowed_scopes) - return { - "error": ( - f"Knowledge scope '{scope}' is unavailable in this context. " - f"Allowed scopes: {allowed_text}" - ) - } - if tid and "session" in allowed_scopes: - kwargs.setdefault("thread_id", tid) - return await fn(*args, **kwargs) - - _wrapped.__doc__ = getattr(fn, "__doc__", None) - _wrapped._knowledge_allowed_scopes = allowed_scopes - _wrapped._knowledge_default_scope = default_scope - _wrapped._knowledge_thread_id = tid - return _wrapped - - for tool_name in knowledge_tool_names: - original_fn = self._tools_context.get(tool_name) - if original_fn: - self._tools_context[tool_name] = _wrap_knowledge_tool( - original_fn, - _thread_id, - allowed_knowledge_scopes, - default_knowledge_scope, - ) - - # Note: scope rules are injected once via effective_instructions. - # No per-tool decoration needed — avoids repeated text in prompt. - - # Inject knowledge base awareness if knowledge tools are available - effective_instructions = self._instructions - # Detect knowledge tools — works for both registry (app named - # "knowledge") and SDK mode (tools under "runtime_tools") - has_knowledge_tools = any( - getattr(app, "name", "") == "knowledge" for app in (apps_for_prompt or []) - ) - if not has_knowledge_tools and tools_for_execution: - has_knowledge_tools = any( - getattr(t, "name", "").startswith("knowledge_") for t in tools_for_execution - ) - knowledge_scope_instruction = _knowledge_scope_instruction( - allowed_knowledge_scopes, - _thread_id or None, - ) - if knowledge_tool_names: - effective_instructions = ( - f"{knowledge_scope_instruction}\n\n{effective_instructions}" - if effective_instructions - else knowledge_scope_instruction - ) - if has_knowledge_tools: - try: - from cuga.backend.knowledge.awareness import ( - get_knowledge_summary, - format_knowledge_context, - get_engine_from_app_state, - ) - - cfg = config.get("configurable", {}) - engine = cfg.get("knowledge_engine") or get_engine_from_app_state() - # Get agent_id: configurable > app_state > fallback - agent_id = cfg.get("agent_id") - knowledge_config_hash = cfg.get("knowledge_config_hash") - if not agent_id: - try: - from cuga.backend.server.main import app as _app - - _as = getattr(_app.state, "app_state", None) - agent_id = getattr(_as, "agent_id", None) if _as else None - if knowledge_config_hash is None: - knowledge_config_hash = ( - getattr(_as, "knowledge_config_hash", None) if _as else None - ) - except Exception: - pass - if not agent_id: - agent_id = "cuga-default" - awareness_thread_id = cfg.get("thread_id") - kb_ctx = format_knowledge_context( - agent_id, - awareness_thread_id, - engine=engine, - agent_config_hash=knowledge_config_hash, - ) - logger.info( - f"Knowledge awareness: agent_id={agent_id}, thread_id={awareness_thread_id}, " - f"agent_collection={kb_ctx.get('agent_collection')}, " - f"session_collection={kb_ctx.get('session_collection')}" - ) - - if not engine: - logger.warning("Knowledge awareness skipped: engine not available") - else: - # Use draft knowledge config for search-time params when running - # in draft mode (Try-It-Out). Published agent always uses engine config. - _search_cfg = engine._config - _is_draft = agent_id and agent_id.endswith("--draft") - if _is_draft: - try: - from cuga.backend.server.main import app as _app - - _das = getattr(_app.state, "draft_app_state", None) - _draft_kc = getattr(_das, "draft_knowledge_config", None) if _das else None - if _draft_kc: - _search_cfg = _draft_kc - except Exception: - pass - knowledge_block = await get_knowledge_summary( - engine, - agent_collection=kb_ctx.get("agent_collection"), - session_collection=kb_ctx.get("session_collection"), - max_search_attempts=getattr(_search_cfg, "max_search_attempts", None) - or getattr(engine._config, "max_search_attempts", None), - default_limit=getattr(_search_cfg, "default_limit", None) - or getattr(engine._config, "default_limit", None), - rag_profile=getattr(_search_cfg, "rag_profile", None) - or getattr(engine._config, "rag_profile", "standard"), - ) - if knowledge_block: - # Load knowledge search instructions from dedicated file - knowledge_instructions_text = "" - try: - kb_instructions_path = ( - Path(__file__).parents[4] - / "configurations" - / "knowledge" - / "knowledge_instructions.md" - ) - if kb_instructions_path.exists(): - knowledge_instructions_text = kb_instructions_path.read_text( - encoding="utf-8" - ).strip() - except Exception as ki_err: - logger.debug(f"Failed to load knowledge instructions: {ki_err}") - - # Prepend knowledge block BEFORE other instructions - # so the LLM sees it early and acts on it - effective_instructions = ( - f"{knowledge_block}\n\n{knowledge_instructions_text}\n\n{effective_instructions}" - if effective_instructions - else f"{knowledge_block}\n\n{knowledge_instructions_text}" - ) - logger.info(f"Knowledge awareness injected: {len(knowledge_block)} chars") - except Exception as e: - logger.debug(f"Knowledge awareness injection skipped: {e}") - if lc_bind_tools_meta is not None: - lc_bind_tools_meta["_lc_bind_tools_overlay_structured_tools"] = [ - t for t in (tools_for_prompt or []) if getattr(t, "name", None) - ] - - # Create prompt dynamically - dynamic_prompt = self._static_prompt - - if not dynamic_prompt: - dynamic_prompt = create_mcp_prompt( - tools_for_prompt, - allow_user_clarification=True, - return_to_user_cases=None, - instructions=effective_instructions, - apps=apps_for_prompt, - task_loaded_from_file=task_loaded_from_file, - is_autonomous_subtask=settings.advanced_features.force_autonomous_mode - or is_autonomous_subtask, - prompt_template=self._prompt_template, - enable_find_tools=enable_find_tools, - enable_todos=enable_todos, - special_instructions=special_instructions_final, - skills_enabled=skills_enabled, - skills_prompt_section=skills_prompt_section, - enable_shell_tool=getattr(settings.advanced_features, "enable_shell_tool", False), - has_knowledge=has_knowledge_tools, - few_shot_examples=few_shot_examples, - few_shots_enabled=few_shots_enabled, - ) - logger.info( - "Prepared CugaLite prompt: enable_find_tools={} few_shot_message_turns={} " - "few_shots_as_messages={} prompt_chars={}", - enable_find_tools, - len(few_shot_examples), - bool(few_shot_examples), - len(dynamic_prompt), - ) - else: - logger.info( - "Using static CugaLite prompt; dynamic few-shot injection skipped " - "(enable_find_tools={} few_shot_turns={})", - enable_find_tools, - len(few_shot_examples), - ) - - reflection_apps_snapshot = format_apps_for_prompt(apps_for_prompt or []) - - return Command( - goto="call_model", - update={ - "tools_prepared": True, - "prepared_prompt": dynamic_prompt, - "step_count": 0, - "cuga_lite_metadata": state.cuga_lite_metadata, - "reflection_apps": reflection_apps_snapshot, - "reflection_enable_find_tools": enable_find_tools, - "reflection_skills_enabled": skills_enabled, - "reflection_skills_prompt_section": skills_prompt_section, - "mcp_few_shot_messages": few_shot_examples, - }, - ) - - return prepare_tools_and_apps - - def build_sandbox_node(self, base_thread_id: Any, base_apps_list: Any): - """Return the sandbox async node.""" - - async def sandbox(state: Any, config: Optional[RunnableConfig] = None): - """Execute code in sandbox and return results.""" - from cuga.backend.cuga_graph.nodes.cuga_lite.tracking.tracker import ToolCallTracker - - # Check if user denied approval (only if policies are enabled) - if settings.policy.enabled: - denial_command = ToolApprovalHandler.handle_denial(self, state) - if denial_command: - return denial_command - - configurable = config.get("configurable", {}) if config else {} - max_steps = ( - configurable.get("cuga_lite_max_steps") if "cuga_lite_max_steps" in configurable else None - ) - if "thread_id" in configurable: - current_thread_id = configurable["thread_id"] - else: - current_thread_id = state.thread_id or base_thread_id - current_apps_list = configurable.get("apps_list", base_apps_list) - track_tool_calls = configurable.get("track_tool_calls", False) - reflection_enabled = ( - configurable.get("reflection_enabled") - if "reflection_enabled" in configurable - else settings.advanced_features.reflection_enabled - ) - - # Get existing variables using CugaLiteState's own variables_manager - existing_vars = {} - for var_name in list(state.variables_manager.get_variable_names()): - var_value = state.variables_manager.get_variable(var_name) - if is_find_tools_listing_markdown(var_value): - state.variables_manager.remove_variable(var_name) - continue - existing_vars[var_name] = var_value - - # Add tools to context - context = {**existing_vars, **self._tools_context} - - # Start tool call tracking (only if enabled via invoke parameter) - ToolCallTracker.start_tracking(enabled=track_tool_calls) - - try: - # Execute the script - pass the CugaLiteState itself since it has variables_manager - _exec_plan = ExecutionRouter.resolve(settings) - if _exec_plan.split_execution_active: - logger.info( - "Split execution: python=%s shell=%s fs=%s", - _exec_plan.python_backend, - _exec_plan.shell_backend, - _exec_plan.filesystem_backend, - ) - output, new_vars = await CodeExecutor.eval_with_tools_async( - code=state.script, - _locals=context, - state=state, # Pass CugaLiteState - it has variables_manager property - thread_id=current_thread_id, - apps_list=current_apps_list, - plan=_exec_plan, - ) - - self._tracker.collect_step(step=Step(name="User_output", data=output)) - self._tracker.collect_step( - step=Step( - name="User_output_variables", - data=json.dumps( - new_vars, - default=lambda o: o.model_dump() if hasattr(o, "model_dump") else str(o), - ), - ) - ) - - # Output is already formatted and trimmed by code_executor - logger.debug(f"\n\n------\n\n📝 Execution output:\n\n{output}\n\n------\n\n") - - # Update variables using CugaLiteState's variables_manager - # This automatically updates state.variables_storage - for name, value in new_vars.items(): - if is_find_tools_listing_markdown(value): - continue - state.variables_manager.add_variable( - value, name=name, description="Created during code execution" - ) - - reflection_output = "" - if reflection_enabled: - try: - active_model = configurable.get("llm") or _llm_manager.get_model( - settings.agent.planner.model - ) - reflection_agent = reflection_task(llm=active_model) - # Format chat messages as history string - agent_history_parts = [] - for msg in state.chat_messages: - if isinstance(msg, HumanMessage): - agent_history_parts.append(f"User: {msg.content}") - elif isinstance(msg, AIMessage): - agent_history_parts.append(f"Assistant: {msg.content}") - else: - agent_history_parts.append( - f"{type(msg).__name__}: {getattr(msg, 'content', str(msg))}" - ) - agent_history = ( - "\n".join(agent_history_parts) - if agent_history_parts - else "No previous conversation history" - ) - reflection_result = await reflection_agent.ainvoke( - { - "instructions": "", - "current_task": _reflection_current_task(state) or "(no task text)", - "agent_history": agent_history, - "coder_agent_output": output, - "apps": state.reflection_apps or [], - "enable_find_tools": state.reflection_enable_find_tools, - "skills_enabled": state.reflection_skills_enabled, - "skills_prompt_section": state.reflection_skills_prompt_section, - "force_autonomous_mode": settings.advanced_features.force_autonomous_mode, - } - ) - reflection_output = reflection_result.content - logger.debug(f"Reflection output:\n{reflection_output}") - except Exception as e: - logger.warning(f"Reflection failed: {e}") - reflection_output = "" - - # Output is already formatted by code_executor - execution_message_content = execution_output_text(output) - if reflection_output: - execution_message_content = ( - f"{execution_message_content}\n\n---\n\nSummary:\n{reflection_output}" - ) - - self._tracker.collect_step( - step=Step( - name="User_return", - data=execution_message_content, - ) - ) - - new_message = HumanMessage(content=execution_message_content) - updated_messages, error_message = _core_append_with_step_limit( - self, state, [new_message], max_steps - ) - - # Collect tool calls from this execution - execution_tool_calls = ToolCallTracker.stop_tracking() - accumulated_tool_calls = (state.tool_calls or []) + execution_tool_calls - - if error_message: - return _core_create_error_command( - self, - updated_messages, - error_message, - state.step_count, - additional_updates={ - "variables_storage": state.variables_storage, - "variable_counter_state": state.variable_counter_state, - "variable_creation_order": state.variable_creation_order, - "tool_calls": accumulated_tool_calls, - }, - ) - - todo_state_update = extract_task_todos_from_new_vars(new_vars) - base_update = { - "chat_messages": updated_messages, - "variables_storage": state.variables_storage, - "variable_counter_state": state.variable_counter_state, - "variable_creation_order": state.variable_creation_order, - "step_count": state.step_count + 1, - "tool_calls": accumulated_tool_calls, - } - if todo_state_update is not None: - base_update["task_todos"] = todo_state_update - return base_update - except Exception as e: - # Collect tool calls even on error - execution_tool_calls = ToolCallTracker.stop_tracking() - accumulated_tool_calls = (state.tool_calls or []) + execution_tool_calls - - error_msg = f"Error during execution: {str(e)}" - logger.error(error_msg) - new_message = HumanMessage(content=error_msg) - updated_messages, limit_error_message = _core_append_with_step_limit( - self, state, [new_message], max_steps - ) - - if limit_error_message: - return _core_create_error_command( - self, updated_messages, limit_error_message, state.step_count - ) - - return { - "chat_messages": updated_messages, - "error": error_msg, - "execution_complete": True, - "step_count": state.step_count + 1, - "tool_calls": accumulated_tool_calls, - } +from cuga.backend.cuga_graph.nodes.cuga_lite.adapter.graph_adapter import AgentGraphAdapter - return sandbox +__all__ = ["AgentGraphAdapter"] diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/cuga_lite_graph.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/cuga_lite_graph.py index 511e86bb..4b23e759 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/cuga_lite_graph.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/cuga_lite_graph.py @@ -1,7 +1,7 @@ """CugaLite LangGraph — thin wiring module. State class, loop adapter, and the ``create_cuga_lite_graph`` factory. -All node logic lives in ``AgentGraphAdapter``. +All node logic lives in ``cuga_lite.adapter`` (prepare/sandbox nodes + hook overrides). """ from pathlib import Path diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/bind_tools.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/bind_tools.py index a1c27792..fb3df792 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/bind_tools.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/bind_tools.py @@ -93,7 +93,7 @@ async def _indexed_provider_tools_first_wins( try: all_tools = await tool_provider.get_all_tools() except Exception as e: - logger.warning("bind_tools: get_all_tools failed: %s", e) + logger.warning("bind_tools: get_all_tools failed: {}", e) return {} by_name: Dict[str, StructuredTool] = {} duplicates: Set[str] = set() @@ -228,7 +228,7 @@ async def resolve_model_with_bind_tools( seen_names.add(name) bound.append(t) except Exception as e: - logger.warning("bind_tools apps_and_tools: get_tools(%s) failed: %s", app_name, e) + logger.warning("bind_tools apps_and_tools: get_tools({}) failed: {}", app_name, e) by_name_lookup: Dict[str, StructuredTool] = {} if tool_names: @@ -283,7 +283,7 @@ async def resolve_model_with_bind_tools( seen.add(name) bound.append(t) except Exception as e: - logger.warning("bind_tools apps: get_tools(%s) failed: %s", app_name, e) + logger.warning("bind_tools apps: get_tools({}) failed: {}", app_name, e) _merge_find_tools_into_bound( bound, seen, include_find_tools=include_find_tools, tools_context_ref=tools_context_ref ) @@ -336,5 +336,5 @@ async def resolve_model_with_bind_tools( mode, ) except Exception as e: - logger.warning("resolve_model_with_bind_tools failed: %s", e) + logger.warning("resolve_model_with_bind_tools failed: {}", e) return active_model diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/find_tools.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/find_tools.py index 50aa27b1..d74ef17d 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/find_tools.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/helpers/find_tools.py @@ -82,6 +82,8 @@ async def find_tools_func(query: str, app_name: str): """ if app_to_tools_map and app_name in app_to_tools_map: filtered_tools = app_to_tools_map[app_name] + elif app_to_tools_map is None: + filtered_tools = all_tools else: logger.warning( f"App '{app_name}' not found in app_to_tools_map. Available apps: {list(app_to_tools_map.keys()) if app_to_tools_map else 'N/A'}" diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/providers/langchain.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/providers/langchain.py index eda02826..d97b9fec 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/providers/langchain.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/providers/langchain.py @@ -61,15 +61,14 @@ def _validate_tools(self): raise ValueError(f"Tool at index {i} is missing a name") if isinstance(tool, StructuredTool) and not hasattr(tool, "func"): - if not hasattr(tool, "coroutine") and not hasattr(tool, "_run"): - logger.warning( - f"StructuredTool '{tool.name}' is missing .func attribute. " - f"Adding it for CodeAct compatibility." - ) - if hasattr(tool, "coroutine") and tool.coroutine: - tool.func = tool.coroutine - elif hasattr(tool, "_run"): - tool.func = tool._run + logger.warning( + f"StructuredTool '{tool.name}' is missing .func attribute. " + f"Adding it for CodeAct compatibility." + ) + if hasattr(tool, "coroutine") and tool.coroutine: + tool.func = tool.coroutine + elif hasattr(tool, "_run"): + tool.func = tool._run async def initialize(self): """Initialize the provider (validates tools).""" diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_agent_graph_adapter.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_agent_graph_adapter.py index ac4eecb2..5e394739 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_agent_graph_adapter.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_agent_graph_adapter.py @@ -240,7 +240,7 @@ async def test_classify_auto_continue_delegates_to_nl_classifier(): mock_model = MagicMock() with patch( - "cuga.backend.cuga_graph.nodes.cuga_lite.agent_graph_adapter.classify_nl_auto_continue", + "cuga.backend.cuga_graph.nodes.cuga_lite.adapter.graph_adapter.classify_nl_auto_continue", new_callable=AsyncMock, return_value=True, ) as mock_classify: @@ -255,7 +255,7 @@ async def test_classify_auto_continue_returns_false_when_not_continuing(): state = SimpleNamespace() with patch( - "cuga.backend.cuga_graph.nodes.cuga_lite.agent_graph_adapter.classify_nl_auto_continue", + "cuga.backend.cuga_graph.nodes.cuga_lite.adapter.graph_adapter.classify_nl_auto_continue", new_callable=AsyncMock, return_value=False, ): diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/delegation.py b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/delegation.py new file mode 100644 index 00000000..d2b1b2ad --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/delegation.py @@ -0,0 +1,113 @@ +"""Agent delegation helpers for supervisor conversational mode.""" + +from __future__ import annotations + +import inspect +from typing import Any, Callable, Dict, List, Optional + +from loguru import logger + +from cuga.config import settings + + +def resolve_names_from_caller_frame(variable_names: List[str]) -> Dict[str, Any]: + """Resolve names from the delegated code's caller frame. + + LocalExecutor injects supervisor context into ``_async_main``'s globals; only + using ``f_locals`` missed those bindings, so sub-agents received no variables + and tasks showed e.g. ``amount=None``. + """ + resolved: Dict[str, Any] = {} + frame = inspect.currentframe() + try: + caller = frame.f_back if frame is not None else None + if caller is None: + return resolved + for name in variable_names: + if name in caller.f_locals: + resolved[name] = caller.f_locals[name] + elif name in caller.f_globals: + resolved[name] = caller.f_globals[name] + finally: + del frame + return resolved + + +def create_agent_delegation_func( + adapter: Any, + agent_name: str, + agent_or_config: Any, + agent_card: Any = None, +) -> Callable: + from cuga.backend.cuga_graph.nodes.cuga_supervisor.a2a_protocol import ( + A2AProtocol, + HAS_A2A_SDK, + delegate_task_via_a2a_sdk, + ) + from cuga.sdk import CugaAgent + + pass_variables_a2a = getattr(settings.supervisor, "pass_variables_a2a", False) + + async def delegate_to_agent(task: str, variables: Optional[List[str]] = None) -> Any: + logger.info(f"Delegating to {agent_name}: {task[:100]}...") + + if isinstance(agent_or_config, CugaAgent): + vars_to_pass = {} + if variables is not None: + vars_to_pass = resolve_names_from_caller_frame(variables) + result = await agent_or_config.invoke( + task, + thread_id=f"supervisor_conversational_{agent_name}", + variables=vars_to_pass if vars_to_pass else None, + ) + if hasattr(result, "variables") and result.variables and adapter._shared_vm_ref[0] is not None: + from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.variable_bridge import ( + VariableBridge, + ) + + bridged = VariableBridge.bridge( + result.variables, + adapter._shared_vm_ref[0], + description_prefix=f"from {agent_name}", + ) + if bridged: + logger.info("Bridged %d variable(s) from %s: %s", len(bridged), agent_name, bridged) + return result.answer if hasattr(result, "answer") else str(result) + + if isinstance(agent_or_config, dict) and agent_or_config.get("type") == "external": + a2a_config = agent_or_config.get("config", {}).get("a2a_protocol", {}) + endpoint = a2a_config.get("endpoint") + transport = a2a_config.get("transport", "http") + + if agent_card is not None and HAS_A2A_SDK and transport == "http": + vars_to_pass = {} + if pass_variables_a2a and variables is not None: + vars_to_pass = resolve_names_from_caller_frame(variables) + result = await delegate_task_via_a2a_sdk( + agent_card, + task, + auth=a2a_config.get("auth"), + timeout=float(a2a_config.get("timeout", 30)), + variables=vars_to_pass if vars_to_pass else None, + ) + return result.get("result", "") + + a2a_protocol = A2AProtocol(endpoint=endpoint, transport=transport) + await a2a_protocol.connect() + try: + vars_to_pass = {} + if variables is not None: + vars_to_pass = resolve_names_from_caller_frame(variables) + result = await a2a_protocol.delegate_task( + target_agent=agent_name, + task=task, + context={"thread_id": None}, + variables=vars_to_pass, + ) + return result.get("result", "") + finally: + await a2a_protocol.disconnect() + + return f"Error: Unknown agent type for {agent_name}" + + return delegate_to_agent diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/nodes/execute_agent_tool.py b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/nodes/execute_agent_tool.py new file mode 100644 index 00000000..3aec11a0 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/nodes/execute_agent_tool.py @@ -0,0 +1,107 @@ +"""Execute node for the supervisor conversational graph.""" + +from __future__ import annotations + +from typing import Any, Callable, Optional + +from langchain_core.messages import HumanMessage +from langchain_core.runnables import RunnableConfig +from loguru import logger + +from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.graph_nodes import ( + append_chat_messages_with_step_limit as core_append, + create_error_command as core_create_error, + execution_output_text, +) +from cuga.backend.cuga_graph.nodes.cuga_agent_core.policy.execution_policy import ExecutionRouter +from cuga.backend.cuga_graph.nodes.cuga_agent_core.policy.tool_approval_handler import ToolApprovalHandler +from cuga.backend.cuga_graph.nodes.cuga_lite.executors import CodeExecutor +from cuga.backend.cuga_graph.nodes.cuga_supervisor.cuga_supervisor_state import CugaSupervisorState +from cuga.config import settings + + +def create_execute_agent_tool_node(adapter: Any) -> Callable: + def append(state, new_msgs): + return core_append(adapter, state, new_msgs) + + def create_error(updated_messages, error_message, step_count, additional_updates=None): + return core_create_error(adapter, updated_messages, error_message, step_count, additional_updates) + + async def execute_agent_tool(state: CugaSupervisorState, config: Optional[RunnableConfig] = None): + logger.info("Supervisor conversational: executing agent delegation code") + + if settings.policy.enabled: + denial_command = ToolApprovalHandler.handle_denial(adapter, state) + if denial_command: + return denial_command + + existing_vars = {} + var_manager = adapter.get_variable_manager(state) + if var_manager is not None: + for var_name in var_manager.get_variable_names(): + existing_vars[var_name] = var_manager.get_variable(var_name) + adapter._shared_vm_ref[0] = var_manager + + context = {**existing_vars, **adapter._agent_tools_context} + + try: + exec_plan = ExecutionRouter.resolve(settings) + if exec_plan.split_execution_active: + logger.info( + "Supervisor split execution: python=%s shell=%s fs=%s", + exec_plan.python_backend, + exec_plan.shell_backend, + exec_plan.filesystem_backend, + ) + output, new_vars = await CodeExecutor.eval_with_tools_async( + code=state.script, + _locals=context, + state=state, + thread_id=state.thread_id, + apps_list=None, + variable_manager=adapter.get_variable_manager(state), + plan=exec_plan, + ) + + logger.debug(f"Execution output: {output.strip()[:500]}...") + + if var_manager is not None: + for name, value in new_vars.items(): + var_manager.add_variable( + value, name=name, description="Created during agent delegation execution" + ) + + execution_message_content = execution_output_text(output) + new_message = HumanMessage(content=execution_message_content) + updated_messages, error_message = append(state, [new_message]) + + if error_message: + return create_error( + updated_messages, + error_message, + state.step_count, + additional_updates={"supervisor_variables": state.supervisor_variables}, + ) + + return { + "supervisor_chat_messages": updated_messages, + "supervisor_variables": state.supervisor_variables, + "step_count": state.step_count + 1, + } + except Exception as exc: + error_msg = f"Error during execution: {str(exc)}" + logger.error(error_msg, exc_info=True) + new_message = HumanMessage(content=error_msg) + updated_messages, limit_error_message = append(state, [new_message]) + + if limit_error_message: + return create_error(updated_messages, limit_error_message, state.step_count) + + return { + "supervisor_chat_messages": updated_messages, + "error": error_msg, + "execution_complete": True, + "step_count": state.step_count + 1, + } + + return execute_agent_tool diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/nodes/prepare_agents_and_prompt.py b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/nodes/prepare_agents_and_prompt.py new file mode 100644 index 00000000..68bf4827 --- /dev/null +++ b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/nodes/prepare_agents_and_prompt.py @@ -0,0 +1,249 @@ +"""Prepare node for the supervisor conversational graph.""" + +from __future__ import annotations + +import os +from pathlib import Path +from typing import Any, Callable, Optional + +from langchain_core.runnables import RunnableConfig +from langgraph.types import Command +from loguru import logger + +from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.code_extraction import make_tool_awaitable +from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.todos import create_update_todos_tool +from cuga.backend.cuga_graph.nodes.cuga_agent_core.policy.execution_policy import ( + ExecutionRouter, + split_execution_note, +) +from cuga.backend.cuga_graph.nodes.cuga_agent_core.policy.tool_approval_handler import ToolApprovalHandler +from cuga.backend.cuga_graph.nodes.cuga_agent_core.tools.runtime_tools import ( + build_runtime_tools, + prompt_tool_dicts, + resolve_runtime_backends, +) +from cuga.backend.cuga_graph.nodes.cuga_supervisor.cuga_supervisor_state import AgentInfo, CugaSupervisorState +from cuga.backend.cuga_graph.nodes.cuga_supervisor.delegation import create_agent_delegation_func +from cuga.config import settings +from cuga.configurations.instructions_manager import get_all_instructions_formatted + + +def create_prepare_agents_and_prompt_node(adapter: Any) -> Callable: + prompt_path = Path(__file__).resolve().parent.parent / "prompts" / "supervisor_lite_prompt.jinja2" + with open(prompt_path, "r", encoding="utf-8") as prompt_file: + prompt_template_str = prompt_file.read() + instructions = get_all_instructions_formatted() + + async def prepare_agents_and_prompt( + state: CugaSupervisorState, config: Optional[RunnableConfig] = None + ) -> Command: + logger.info("Preparing agents and prompt for supervisor conversational mode") + + if settings.policy.enabled and not ToolApprovalHandler.should_skip_policy_check(adapter, state): + from cuga.backend.cuga_graph.policy.enactment import PolicyEnactment + from cuga.backend.cuga_graph.policy.models import PolicyType + + policy_command, policy_metadata = await PolicyEnactment.check_and_enact( + state, + config, + policy_types=[ + PolicyType.INTENT_GUARD, + PolicyType.PLAYBOOK, + PolicyType.TOOL_GUIDE, + ], + adapter=adapter, + ) + if policy_command: + return policy_command + if policy_metadata: + adapter.set_metadata(state, policy_metadata) + + from cuga.backend.cuga_graph.nodes.cuga_supervisor.a2a_protocol import ( + HAS_A2A_SDK, + _agent_card_description, + fetch_agent_card, + format_agent_card_for_prompt, + ) + from cuga.sdk import CugaAgent + + agent_list = [] + agent_tools_for_prompt = [] + pass_variables_a2a = getattr(settings.supervisor, "pass_variables_a2a", False) + + for agent_name, agent_or_config in adapter._agents.items(): + agent_card = None + if isinstance(agent_or_config, CugaAgent): + agent_type = "internal" + description = getattr(agent_or_config, "description", f"Internal agent: {agent_name}") + elif isinstance(agent_or_config, dict): + agent_type = agent_or_config.get("type", "external") + a2a_cfg = agent_or_config.get("config", {}).get("a2a_protocol", {}) + if agent_type == "external" and HAS_A2A_SDK and a2a_cfg.get("transport") == "http": + endpoint = a2a_cfg.get("endpoint") + if endpoint: + try: + agent_card = await fetch_agent_card( + endpoint, + auth=a2a_cfg.get("auth"), + timeout=float(a2a_cfg.get("timeout", 30)), + ) + description = _agent_card_description(agent_card) + except Exception as e: + logger.warning(f"Failed to fetch A2A agent card for {agent_name}: {e}") + description = agent_or_config.get("description", f"External agent: {agent_name}") + else: + description = agent_or_config.get("description", f"External agent: {agent_name}") + else: + description = agent_or_config.get("description", f"{agent_type} agent: {agent_name}") + else: + agent_type = "unknown" + description = f"Agent: {agent_name}" + + agent_entry = {"name": agent_name, "type": agent_type, "description": description} + if agent_card is not None: + agent_entry["agent_card"] = format_agent_card_for_prompt(agent_card) + agent_list.append(agent_entry) + + tool_name = f"delegate_to_{agent_name}" + tool_func = create_agent_delegation_func( + adapter, agent_name, agent_or_config, agent_card=agent_card + ) + adapter._agent_tools_context[tool_name] = tool_func + + is_a2a_agent = agent_card is not None + if is_a2a_agent and pass_variables_a2a: + tool_info = { + "name": tool_name, + "description": ( + f"Delegate a task to the {agent_name} agent. {description} " + "Variables are passed in request metadata." + ), + "params_str": "task: str, variables: Optional[List[str]] = None", + "params_doc": ( + f"- task (str): The task description to send to {agent_name}\n" + f"- variables (Optional[List[str]]): Variable names to pass in A2A metadata" + ), + "response_doc": f"Returns the result from {agent_name}.", + } + elif is_a2a_agent: + tool_info = { + "name": tool_name, + "description": f"Delegate a task to {agent_name}. {description}", + "params_str": "task: str", + "params_doc": f"- task (str): The task description to send to {agent_name}.", + "response_doc": f"Returns the result from {agent_name}.", + } + else: + tool_info = { + "name": tool_name, + "description": ( + f"Delegate a task to the {agent_name} agent. This agent specializes in: {description}" + ), + "params_str": "task: str, variables: Optional[List[str]] = None", + "params_doc": ( + f"- task (str): The task description to delegate to {agent_name}\n" + f"- variables (Optional[List[str]]): List of variable names to pass" + ), + "response_doc": f"Returns the result from {agent_name} agent execution.", + } + agent_tools_for_prompt.append(tool_info) + + todos_tool = await create_update_todos_tool() + adapter._agent_tools_context["create_update_todos"] = make_tool_awaitable(todos_tool.func) + agent_tools_for_prompt.append( + { + "name": "create_update_todos", + "description": todos_tool.description, + "params_str": "todos: List[Dict[str, str]]", + "params_doc": ( + "todos: List of todo items, each with 'text' and 'status' ('pending' or 'completed')" + ), + "response_doc": "Returns the current list of todos with their status.", + } + ) + + cfg = config.get("configurable", {}) if config else {} + runtime_thread_id = cfg.get("thread_id") or state.thread_id + runtime_backends = resolve_runtime_backends(settings, cfg) + runtime_bundle = build_runtime_tools(thread_id=runtime_thread_id, backends=runtime_backends) + adapter._agent_tools_context.update(runtime_bundle.execution_callables) + agent_tools_for_prompt.extend(prompt_tool_dicts(runtime_bundle.prompt_tools)) + + skills_section = "" + if getattr(settings.skills, "enabled", False): + from cuga.backend.skills import ( + SkillRegistry, + create_skill_tools, + discover_skills, + format_available_skills_block, + ) + + cuga_folder = os.getenv("CUGA_FOLDER", settings.policy.cuga_folder) + skill_entries = discover_skills(cuga_folder) + if skill_entries: + skill_registry = SkillRegistry(skill_entries) + skill_tools = create_skill_tools(skill_registry) + for skill_tool in skill_tools: + tool_func = ( + skill_tool.coroutine + if getattr(skill_tool, "coroutine", None) + else getattr(skill_tool, "func", None) + ) + if tool_func: + adapter._agent_tools_context[skill_tool.name] = make_tool_awaitable(tool_func) + agent_tools_for_prompt.extend(prompt_tool_dicts(skill_tools)) + skills_section = format_available_skills_block(skill_registry) + logger.info(f"Supervisor: loaded {len(skill_entries)} skill(s)") + + if adapter._tool_provider is not None: + try: + provider_tools = await adapter._tool_provider.get_all_tools() + for provider_tool in provider_tools: + provider_func = ( + provider_tool.coroutine + if getattr(provider_tool, "coroutine", None) + else getattr(provider_tool, "func", None) + ) + if provider_func: + adapter._agent_tools_context[provider_tool.name] = make_tool_awaitable(provider_func) + agent_tools_for_prompt.extend(prompt_tool_dicts(provider_tools)) + logger.info(f"Supervisor: loaded {len(provider_tools)} tool(s) from tool_provider") + except Exception as exc: + logger.warning(f"Supervisor: failed to load tools from tool_provider: {exc}") + + split_note = split_execution_note(ExecutionRouter.resolve(settings)) + effective_special_instructions = ( + "\n\n".join(filter(None, [adapter._special_instructions, skills_section, split_note])) or None + ) + + is_autonomous_subtask = state.sub_task is not None and state.sub_task.strip() != "" + + from jinja2 import Template + + template = Template(prompt_template_str) + dynamic_prompt = template.render( + base_prompt=None, + agents=agent_list, + tools=agent_tools_for_prompt, + is_autonomous_subtask=is_autonomous_subtask, + instructions=instructions, + enable_todos=True, + special_instructions=effective_special_instructions, + ) + + return Command( + goto="call_model", + update={ + "tools_prepared": True, + "prepared_prompt": dynamic_prompt, + "step_count": 0, + "available_agents": { + name: AgentInfo( + name=name, type=info["type"], description=info["description"] + ).model_dump() + for name, info in zip([a["name"] for a in agent_list], agent_list) + }, + }, + ) + + return prepare_agents_and_prompt diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/supervisor_graph_adapter.py b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/supervisor_graph_adapter.py index 2207756a..cb5ee29d 100644 --- a/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/supervisor_graph_adapter.py +++ b/src/cuga/backend/cuga_graph/nodes/cuga_supervisor/supervisor_graph_adapter.py @@ -1,72 +1,32 @@ """SupervisorGraphAdapter — CoreGraphAdapter implementation for CugaSupervisor. -Provides all hook overrides that the shared ``create_call_model_node`` factory -delegates to for Supervisor-specific behaviour: - -- messages_key, metadata_key, execute_node_name, sender_name attributes -- get_messages: reads state.supervisor_chat_messages -- resolve_max_steps: state.cuga_lite_max_steps → settings default -- get_variable_manager: state.supervisor_variables_manager (Phase-9 coupling fix) -- get_variables_storage: state.supervisor_variables -- build_prepare_node(): returns the prepare_agents_and_prompt async node -- build_execute_node(): returns the execute_agent_tool async node - -``_resolve_names_from_caller_frame`` is a module-level helper moved here from -``cuga_supervisor_graph.py`` so delegation functions can resolve variable names -from the delegating code's caller frame. +The adapter defines how the shared agent graph uses supervisor state: message and +metadata keys, variable manager seams, and node factory wiring. Prompt, tool, and +execution logic live in ``cuga_supervisor/nodes/`` and ``delegation.py``. """ from __future__ import annotations -import inspect -import os -from pathlib import Path from typing import Any, Callable, Dict, List, Optional -from langchain_core.messages import BaseMessage, HumanMessage -from loguru import logger +from langchain_core.messages import BaseMessage from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.graph_nodes import CoreGraphAdapter +from cuga.backend.cuga_graph.nodes.cuga_supervisor.delegation import resolve_names_from_caller_frame +from cuga.backend.cuga_graph.nodes.cuga_supervisor.nodes.execute_agent_tool import ( + create_execute_agent_tool_node, +) +from cuga.backend.cuga_graph.nodes.cuga_supervisor.nodes.prepare_agents_and_prompt import ( + create_prepare_agents_and_prompt_node, +) from cuga.config import settings - -# ── Module-level helper (moved from cuga_supervisor_graph.py) ────────────── - - -def _resolve_names_from_caller_frame(variable_names: List[str]) -> Dict[str, Any]: - """Resolve names from the delegated code's caller frame. - - LocalExecutor injects supervisor context into ``_async_main``'s globals; only - using ``f_locals`` missed those bindings, so sub-agents received no variables - and tasks showed e.g. ``amount=None``. - """ - resolved: Dict[str, Any] = {} - frame = inspect.currentframe() - try: - caller = frame.f_back if frame is not None else None - if caller is None: - return resolved - for name in variable_names: - if name in caller.f_locals: - resolved[name] = caller.f_locals[name] - elif name in caller.f_globals: - resolved[name] = caller.f_globals[name] - finally: - del frame - return resolved - - -# ── SupervisorGraphAdapter ───────────────────────────────────────────────── +# Backward-compatible alias for tests and callers that imported the private helper. +_resolve_names_from_caller_frame = resolve_names_from_caller_frame class SupervisorGraphAdapter(CoreGraphAdapter): - """CoreGraphAdapter implementation for the CugaSupervisor multi-agent graph. - - Overrides the hook methods that differ from the no-op defaults and provides - ``build_prepare_node`` / ``build_execute_node`` factories that produce the - graph nodes parameterised by the agents and tool configuration captured at - construction time. - """ + """CoreGraphAdapter implementation for the CugaSupervisor multi-agent graph.""" messages_key: str = "supervisor_chat_messages" execute_node_name: str = "execute_agent_tool" @@ -83,12 +43,9 @@ def __init__( self._agents = agents self._special_instructions = special_instructions self._tool_provider = tool_provider - # Mutable state shared between the prepare and execute node closures self._agent_tools_context: Dict[str, Any] = {} self._shared_vm_ref: List[Any] = [None] - # ── Abstract method implementations ─────────────────────────────────── - def get_messages(self, state: Any) -> List[BaseMessage]: return state.supervisor_chat_messages or [] @@ -101,445 +58,14 @@ def resolve_max_steps(self, state: Any, override: Optional[int]) -> int: else getattr(settings.advanced_features, "cuga_lite_max_steps", 50) ) - # ── Hook overrides ───────────────────────────────────────────────────── - def get_variable_manager(self, state: Any) -> Any: return getattr(state, "supervisor_variables_manager", None) def get_variables_storage(self, state: Any) -> Optional[Any]: return getattr(state, "supervisor_variables", None) - # ── Node factories ───────────────────────────────────────────────────── - def build_prepare_node(self) -> Callable: - """Return the ``prepare_agents_and_prompt`` async node function.""" - from cuga.backend.cuga_graph.nodes.cuga_supervisor.cuga_supervisor_state import ( - AgentInfo, - CugaSupervisorState, - ) - from cuga.backend.cuga_graph.nodes.cuga_lite.tool_approval_handler import ToolApprovalHandler - from cuga.backend.cuga_graph.nodes.cuga_agent_core.tools.runtime_tools import ( - build_runtime_tools, - prompt_tool_dicts, - resolve_runtime_backends, - ) - from cuga.backend.cuga_graph.nodes.cuga_agent_core.policy.execution_policy import ( - ExecutionRouter, - split_execution_note, - ) - from cuga.configurations.instructions_manager import get_all_instructions_formatted - from langchain_core.runnables import RunnableConfig - from langgraph.types import Command - - adapter = self - - prompt_filename = "supervisor_lite_prompt.jinja2" - prompt_path = Path(__file__).parent / "prompts" / prompt_filename - with open(prompt_path, "r", encoding="utf-8") as _f: - _prompt_template_str = _f.read() - _instructions = get_all_instructions_formatted() - - def _create_agent_delegation_func( - agent_name: str, - agent_or_config: Any, - agent_card: Any = None, - ) -> Callable: - from cuga.backend.cuga_graph.nodes.cuga_supervisor.a2a_protocol import ( - A2AProtocol, - HAS_A2A_SDK, - delegate_task_via_a2a_sdk, - ) - from cuga.sdk import CugaAgent - - pass_variables_a2a = getattr(settings.supervisor, "pass_variables_a2a", False) - - async def delegate_to_agent(task: str, variables: Optional[List[str]] = None) -> Any: - logger.info(f"Delegating to {agent_name}: {task[:100]}...") - - if isinstance(agent_or_config, CugaAgent): - vars_to_pass = {} - if variables is not None: - vars_to_pass = _resolve_names_from_caller_frame(variables) - result = await agent_or_config.invoke( - task, - thread_id=f"supervisor_conversational_{agent_name}", - variables=vars_to_pass if vars_to_pass else None, - ) - if ( - hasattr(result, "variables") - and result.variables - and adapter._shared_vm_ref[0] is not None - ): - from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.variable_bridge import ( - VariableBridge, - ) - - bridged = VariableBridge.bridge( - result.variables, - adapter._shared_vm_ref[0], - description_prefix=f"from {agent_name}", - ) - if bridged: - logger.info( - "Bridged %d variable(s) from %s: %s", len(bridged), agent_name, bridged - ) - return result.answer if hasattr(result, "answer") else str(result) - - if isinstance(agent_or_config, dict) and agent_or_config.get("type") == "external": - a2a_config = agent_or_config.get("config", {}).get("a2a_protocol", {}) - endpoint = a2a_config.get("endpoint") - transport = a2a_config.get("transport", "http") - - if agent_card is not None and HAS_A2A_SDK and transport == "http": - vars_to_pass = {} - if pass_variables_a2a and variables is not None: - vars_to_pass = _resolve_names_from_caller_frame(variables) - result = await delegate_task_via_a2a_sdk( - agent_card, - task, - auth=a2a_config.get("auth"), - timeout=float(a2a_config.get("timeout", 30)), - variables=vars_to_pass if vars_to_pass else None, - ) - return result.get("result", "") - else: - a2a_protocol = A2AProtocol(endpoint=endpoint, transport=transport) - await a2a_protocol.connect() - try: - vars_to_pass = {} - if variables is not None: - vars_to_pass = _resolve_names_from_caller_frame(variables) - result = await a2a_protocol.delegate_task( - target_agent=agent_name, - task=task, - context={"thread_id": None}, - variables=vars_to_pass, - ) - return result.get("result", "") - finally: - await a2a_protocol.disconnect() - - return f"Error: Unknown agent type for {agent_name}" - - return delegate_to_agent - - async def prepare_agents_and_prompt( - state: CugaSupervisorState, config: Optional[RunnableConfig] = None - ) -> Command: - logger.info("Preparing agents and prompt for supervisor conversational mode") - - if settings.policy.enabled and not ToolApprovalHandler.should_skip_policy_check(adapter, state): - from cuga.backend.cuga_graph.policy.enactment import PolicyEnactment - from cuga.backend.cuga_graph.policy.models import PolicyType - - policy_command, policy_metadata = await PolicyEnactment.check_and_enact( - state, - config, - policy_types=[ - PolicyType.INTENT_GUARD, - PolicyType.PLAYBOOK, - PolicyType.TOOL_GUIDE, - ], - adapter=adapter, - ) - if policy_command: - return policy_command - if policy_metadata: - adapter.set_metadata(state, policy_metadata) - - from cuga.backend.cuga_graph.nodes.cuga_supervisor.a2a_protocol import ( - HAS_A2A_SDK, - _agent_card_description, - fetch_agent_card, - format_agent_card_for_prompt, - ) - from cuga.sdk import CugaAgent - - agent_list = [] - agent_tools_for_prompt = [] - pass_variables_a2a = getattr(settings.supervisor, "pass_variables_a2a", False) - - for agent_name, agent_or_config in adapter._agents.items(): - agent_card = None - if isinstance(agent_or_config, CugaAgent): - agent_type = "internal" - description = getattr(agent_or_config, "description", f"Internal agent: {agent_name}") - elif isinstance(agent_or_config, dict): - agent_type = agent_or_config.get("type", "external") - a2a_cfg = agent_or_config.get("config", {}).get("a2a_protocol", {}) - if agent_type == "external" and HAS_A2A_SDK and a2a_cfg.get("transport") == "http": - endpoint = a2a_cfg.get("endpoint") - if endpoint: - try: - agent_card = await fetch_agent_card( - endpoint, - auth=a2a_cfg.get("auth"), - timeout=float(a2a_cfg.get("timeout", 30)), - ) - description = _agent_card_description(agent_card) - except Exception as e: - logger.warning(f"Failed to fetch A2A agent card for {agent_name}: {e}") - description = agent_or_config.get( - "description", f"External agent: {agent_name}" - ) - else: - description = agent_or_config.get("description", f"External agent: {agent_name}") - else: - description = agent_or_config.get("description", f"{agent_type} agent: {agent_name}") - else: - agent_type = "unknown" - description = f"Agent: {agent_name}" - - agent_entry = {"name": agent_name, "type": agent_type, "description": description} - if agent_card is not None: - agent_entry["agent_card"] = format_agent_card_for_prompt(agent_card) - agent_list.append(agent_entry) - - tool_name = f"delegate_to_{agent_name}" - tool_func = _create_agent_delegation_func(agent_name, agent_or_config, agent_card=agent_card) - adapter._agent_tools_context[tool_name] = tool_func - - is_a2a_agent = agent_card is not None - if is_a2a_agent and pass_variables_a2a: - tool_info = { - "name": tool_name, - "description": ( - f"Delegate a task to the {agent_name} agent. {description} " - "Variables are passed in request metadata." - ), - "params_str": "task: str, variables: Optional[List[str]] = None", - "params_doc": ( - f"- task (str): The task description to send to {agent_name}\n" - f"- variables (Optional[List[str]]): Variable names to pass in A2A metadata" - ), - "response_doc": f"Returns the result from {agent_name}.", - } - elif is_a2a_agent: - tool_info = { - "name": tool_name, - "description": f"Delegate a task to {agent_name}. {description}", - "params_str": "task: str", - "params_doc": f"- task (str): The task description to send to {agent_name}.", - "response_doc": f"Returns the result from {agent_name}.", - } - else: - tool_info = { - "name": tool_name, - "description": ( - f"Delegate a task to the {agent_name} agent. " - f"This agent specializes in: {description}" - ), - "params_str": "task: str, variables: Optional[List[str]] = None", - "params_doc": ( - f"- task (str): The task description to delegate to {agent_name}\n" - f"- variables (Optional[List[str]]): List of variable names to pass" - ), - "response_doc": f"Returns the result from {agent_name} agent execution.", - } - agent_tools_for_prompt.append(tool_info) - - from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.todos import create_update_todos_tool - from cuga.backend.cuga_graph.nodes.cuga_agent_core.execution.code_extraction import ( - make_tool_awaitable, - ) - - todos_tool = await create_update_todos_tool() - adapter._agent_tools_context["create_update_todos"] = make_tool_awaitable(todos_tool.func) - agent_tools_for_prompt.append( - { - "name": "create_update_todos", - "description": todos_tool.description, - "params_str": "todos: List[Dict[str, str]]", - "params_doc": ( - "todos: List of todo items, each with 'text' and 'status' ('pending' or 'completed')" - ), - "response_doc": "Returns the current list of todos with their status.", - } - ) - - _cfg = config.get("configurable", {}) if config else {} - _runtime_thread_id = _cfg.get("thread_id") or state.thread_id - _runtime_backends = resolve_runtime_backends(settings, _cfg) - _runtime_bundle = build_runtime_tools(thread_id=_runtime_thread_id, backends=_runtime_backends) - adapter._agent_tools_context.update(_runtime_bundle.execution_callables) - agent_tools_for_prompt.extend(prompt_tool_dicts(_runtime_bundle.prompt_tools)) - - _skills_section = "" - if getattr(settings.skills, "enabled", False): - from cuga.backend.skills import ( - SkillRegistry, - create_skill_tools, - discover_skills, - format_available_skills_block, - ) - - _cuga_folder = os.getenv("CUGA_FOLDER", settings.policy.cuga_folder) - _skill_entries = discover_skills(_cuga_folder) - if _skill_entries: - _skill_registry = SkillRegistry(_skill_entries) - _skill_tools = create_skill_tools(_skill_registry) - for _st in _skill_tools: - _tool_func = ( - _st.coroutine if getattr(_st, "coroutine", None) else getattr(_st, "func", None) - ) - if _tool_func: - adapter._agent_tools_context[_st.name] = make_tool_awaitable(_tool_func) - agent_tools_for_prompt.extend(prompt_tool_dicts(_skill_tools)) - _skills_section = format_available_skills_block(_skill_registry) - logger.info(f"Supervisor: loaded {len(_skill_entries)} skill(s)") - - if adapter._tool_provider is not None: - try: - _provider_tools = await adapter._tool_provider.get_all_tools() - for _pt in _provider_tools: - _pt_func = ( - _pt.coroutine if getattr(_pt, "coroutine", None) else getattr(_pt, "func", None) - ) - if _pt_func: - adapter._agent_tools_context[_pt.name] = make_tool_awaitable(_pt_func) - agent_tools_for_prompt.extend(prompt_tool_dicts(_provider_tools)) - logger.info(f"Supervisor: loaded {len(_provider_tools)} tool(s) from tool_provider") - except Exception as _e: - logger.warning(f"Supervisor: failed to load tools from tool_provider: {_e}") - - _split_note = split_execution_note(ExecutionRouter.resolve(settings)) - _effective_special_instructions = ( - "\n\n".join(filter(None, [adapter._special_instructions, _skills_section, _split_note])) - or None - ) - - is_autonomous_subtask = state.sub_task is not None and state.sub_task.strip() != "" - - from jinja2 import Template - - template = Template(_prompt_template_str) - dynamic_prompt = template.render( - base_prompt=None, - agents=agent_list, - tools=agent_tools_for_prompt, - is_autonomous_subtask=is_autonomous_subtask, - instructions=_instructions, - enable_todos=True, - special_instructions=_effective_special_instructions, - ) - - return Command( - goto="call_model", - update={ - "tools_prepared": True, - "prepared_prompt": dynamic_prompt, - "step_count": 0, - "available_agents": { - name: AgentInfo( - name=name, type=info["type"], description=info["description"] - ).model_dump() - for name, info in zip([a["name"] for a in agent_list], agent_list) - }, - }, - ) - - return prepare_agents_and_prompt + return create_prepare_agents_and_prompt_node(self) def build_execute_node(self) -> Callable: - """Return the ``execute_agent_tool`` async node function.""" - from cuga.backend.cuga_graph.nodes.cuga_supervisor.cuga_supervisor_state import ( - CugaSupervisorState, - ) - from cuga.backend.cuga_graph.nodes.cuga_lite.tool_approval_handler import ToolApprovalHandler - from cuga.backend.cuga_graph.nodes.cuga_lite.executors import CodeExecutor - from cuga.backend.cuga_graph.nodes.cuga_agent_core.graph.graph_nodes import ( - append_chat_messages_with_step_limit as _core_append, - create_error_command as _core_create_error, - execution_output_text, - ) - from cuga.backend.cuga_graph.nodes.cuga_agent_core.policy.execution_policy import ExecutionRouter - from langchain_core.runnables import RunnableConfig - - adapter = self - - def _append(state, new_msgs): - return _core_append(adapter, state, new_msgs) - - def _create_error(updated_messages, error_message, step_count, additional_updates=None): - return _core_create_error( - adapter, updated_messages, error_message, step_count, additional_updates - ) - - async def execute_agent_tool(state: CugaSupervisorState, config: Optional[RunnableConfig] = None): - logger.info("Supervisor conversational: executing agent delegation code") - - if settings.policy.enabled: - denial_command = ToolApprovalHandler.handle_denial(adapter, state) - if denial_command: - return denial_command - - existing_vars = {} - var_manager = adapter.get_variable_manager(state) - if var_manager is not None: - for var_name in var_manager.get_variable_names(): - existing_vars[var_name] = var_manager.get_variable(var_name) - adapter._shared_vm_ref[0] = var_manager - - context = {**existing_vars, **adapter._agent_tools_context} - - try: - _exec_plan = ExecutionRouter.resolve(settings) - if _exec_plan.split_execution_active: - logger.info( - "Supervisor split execution: python=%s shell=%s fs=%s", - _exec_plan.python_backend, - _exec_plan.shell_backend, - _exec_plan.filesystem_backend, - ) - output, new_vars = await CodeExecutor.eval_with_tools_async( - code=state.script, - _locals=context, - state=state, - thread_id=state.thread_id, - apps_list=None, - variable_manager=adapter.get_variable_manager(state), - plan=_exec_plan, - ) - - logger.debug(f"Execution output: {output.strip()[:500]}...") - - if var_manager is not None: - for name, value in new_vars.items(): - var_manager.add_variable( - value, name=name, description="Created during agent delegation execution" - ) - - execution_message_content = execution_output_text(output) - new_message = HumanMessage(content=execution_message_content) - updated_messages, error_message = _append(state, [new_message]) - - if error_message: - return _create_error( - updated_messages, - error_message, - state.step_count, - additional_updates={"supervisor_variables": state.supervisor_variables}, - ) - - return { - "supervisor_chat_messages": updated_messages, - "supervisor_variables": state.supervisor_variables, - "step_count": state.step_count + 1, - } - except Exception as e: - error_msg = f"Error during execution: {str(e)}" - logger.error(error_msg, exc_info=True) - new_message = HumanMessage(content=error_msg) - updated_messages, limit_error_message = _append(state, [new_message]) - - if limit_error_message: - return _create_error(updated_messages, limit_error_message, state.step_count) - - return { - "supervisor_chat_messages": updated_messages, - "error": error_msg, - "execution_complete": True, - "step_count": state.step_count + 1, - } - - return execute_agent_tool + return create_execute_agent_tool_node(self) diff --git a/tests/unit/test_cuga_lite_knowledge_scopes.py b/tests/unit/test_cuga_lite_knowledge_scopes.py index 62b45df0..f77bf8c1 100644 --- a/tests/unit/test_cuga_lite_knowledge_scopes.py +++ b/tests/unit/test_cuga_lite_knowledge_scopes.py @@ -2,9 +2,7 @@ from types import SimpleNamespace -from cuga.backend.cuga_graph.nodes.cuga_lite.agent_graph_adapter import ( - _get_knowledge_tool_scope_context, -) +from cuga.backend.cuga_graph.nodes.cuga_lite.helpers.knowledge import _get_knowledge_tool_scope_context from cuga.backend.cuga_graph.nodes.cuga_lite.executors.e2b.e2b_executor import E2BExecutor