Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions context/amplifier-dev/testing-patterns.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,17 +154,30 @@ The highest-confidence validation — tests the actual built artifact in a clean
```bash
# In amplifier-core:
./scripts/e2e-smoke-test.sh

# Cross-repo smoke test (validates local changes to CLI, foundation, modules, etc.):
./scripts/e2e-smoke-test.sh \
--local-source ../amplifier-app-cli \
--local-source ../amplifier-foundation \
--local-source ../amplifier-bundle-modes/modules/hooks-mode \
--local-source ../amplifier-bundle-modes/modules/tool-mode \
--local-source ../amplifier-module-hooks-approval
```

**Note:** For bundles with modules in subdirectories, point `--local-source` at the module path (e.g., `../amplifier-bundle-modes/modules/hooks-mode`), not the bundle root. The module subdirectory must have its own `pyproject.toml`.

### What It Does

1. Builds a wheel from local source (`maturin build`)
2. Creates a fresh Docker container (`python:3.12-slim`)
3. Installs `amplifier` from git (CLI + foundation from GitHub)
4. Overrides `amplifier-core` with the local wheel
5. Runs a real session: `amplifier run "Ask recipe author to run one of its example recipes"`
6. Detects crashes, tool failures, and timeouts
7. Reports PASS/FAIL
5. Overrides additional packages with `--local-source` repos (if any)
6. Runs a real session: `amplifier run "Ask recipe author to run one of its example recipes"`
7. Detects crashes, tool failures, and timeouts
8. Reports PASS/FAIL

The `--local-source` flag can be specified multiple times. Each path is copied into the container and installed with `pip install --force-reinstall --no-deps`, following the same override pattern used for the core wheel. This enables testing cross-repo changes before pushing.

### When Required

Expand Down
67 changes: 60 additions & 7 deletions modules/tool-delegate/amplifier_module_tool_delegate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,9 +726,7 @@ async def execute(self, input: dict) -> ToolResult:
# provider_preferences wins when both are provided (explicit pin overrides matrix)
raw_model_role = input.get("model_role", "").strip()
if raw_model_role and provider_preferences is None:
routing_state = getattr(self.coordinator, "session_state", {}).get(
"routing_matrix"
)
routing_state = self.coordinator.get_capability("session.routing_matrix")
if routing_state:
try:
from amplifier_module_hooks_routing.resolver import (
Expand Down Expand Up @@ -840,6 +838,58 @@ async def execute(self, input: dict) -> ToolResult:
ProviderPreference.from_dict(p) for p in agent_default_prefs
]

return await self._spawn_new_session(
agent_name=agent_name,
instruction=instruction,
context_depth=context_depth,
context_scope=context_scope,
context_turns=context_turns,
provider_preferences=provider_preferences,
hooks=hooks,
agent_configs=agents,
tool_call_id=tool_call_id,
parallel_group_id=parallel_group_id,
)

async def _spawn_new_session(
self,
agent_name: str,
instruction: str,
context_depth: str,
context_scope: str,
context_turns: int,
provider_preferences: Any,
hooks: Any,
*,
agent_configs: dict[str, Any] | None = None,
tool_call_id: str = "",
parallel_group_id: str | None = None,
) -> ToolResult:
"""Spawn a new agent sub-session.

Args:
agent_name: Agent to delegate to
instruction: Task instruction for the agent
context_depth: HOW MUCH context - "none", "recent", or "all"
context_scope: WHICH content - "conversation", "agents", or "full"
context_turns: Number of turns for "recent" mode
provider_preferences: Optional provider/model preferences
hooks: Hook coordinator for event emission (or None)
agent_configs: Agent registry dict (defaults to coordinator.config["agents"])
tool_call_id: Orchestrator tool call ID (enriches event payloads)
parallel_group_id: Parallel group ID (enriches event payloads)

Returns:
ToolResult with success status and output or error
"""
# Resolve agent configs — accept passed-in dict or fetch from coordinator.
# Use isinstance guard to handle MagicMock coordinators in tests gracefully.
if agent_configs is None:
raw_agents = self.coordinator.config.get("agents", {})
agents: dict[str, Any] = raw_agents if isinstance(raw_agents, dict) else {}
else:
agents = agent_configs

# Get parent session ID
parent_session_id = self.coordinator.session_id

Expand Down Expand Up @@ -906,14 +956,17 @@ async def execute(self, input: dict) -> ToolResult:
)
effective_instruction = f"{context_text}\n\n[YOUR TASK]\n{instruction}"

# Extract orchestrator config from parent session for inheritance
# Extract orchestrator config from parent session for inheritance.
# The orchestrator field may be a string (name only) or a dict
# (with optional "config" sub-key). Handle both gracefully.
orchestrator_config = None
parent_config = parent_session.config or {}
session_config = parent_config.get("session", {})
orch_section = session_config.get("orchestrator", {})
if orch_config := orch_section.get("config"):
orchestrator_config = orch_config
logger.debug(f"Inheriting orchestrator config: {orchestrator_config}")
if isinstance(orch_section, dict):
if orch_config := orch_section.get("config"):
orchestrator_config = orch_config
logger.debug(f"Inheriting orchestrator config: {orchestrator_config}")

# Calculate self-delegation depth for child session
# Named agents reset to 0, self-delegation increments
Expand Down
71 changes: 31 additions & 40 deletions modules/tool-delegate/tests/test_delegate_model_role.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,14 @@ def _make_delegate_tool(
*,
spawn_fn=None,
agents: dict | None = None,
session_state: dict | None = None,
routing_matrix: dict | None = None,
) -> DelegateTool:
"""Create a DelegateTool with mocked coordinator for model_role testing."""
coordinator = MagicMock()
coordinator.session_id = "parent-session-123"

coordinator.config = {"agents": agents or {}}

# Session state for routing matrix availability
coordinator.session_state = session_state or {}

capabilities: dict = {
"session.spawn": spawn_fn
or AsyncMock(
Expand All @@ -46,6 +43,7 @@ def _make_delegate_tool(
"agents.list": lambda: agents or {},
"agents.get": lambda name: (agents or {}).get(name),
"self_delegation_depth": 0,
"session.routing_matrix": routing_matrix,
}

def get_capability(name):
Expand Down Expand Up @@ -134,17 +132,15 @@ async def test_model_role_resolves_against_matrix(self):
"description": "A test agent",
}
},
session_state={
"routing_matrix": {
"roles": {
"fast": {
"candidates": [
{
"provider": "anthropic",
"model": "claude-haiku-*",
},
]
}
routing_matrix={
"roles": {
"fast": {
"candidates": [
{
"provider": "anthropic",
"model": "claude-haiku-*",
},
]
}
}
},
Expand Down Expand Up @@ -205,17 +201,15 @@ async def test_provider_preferences_overrides_model_role(self):
agents={
"test-agent": {"description": "A test agent"},
},
session_state={
"routing_matrix": {
"roles": {
"fast": {
"candidates": [
{
"provider": "anthropic",
"model": "claude-haiku-3.5",
},
]
}
routing_matrix={
"roles": {
"fast": {
"candidates": [
{
"provider": "anthropic",
"model": "claude-haiku-3.5",
},
]
}
}
},
Expand Down Expand Up @@ -276,18 +270,16 @@ async def test_model_role_resolution_includes_config(self):
agents={
"coding-agent": {"description": "A coding agent"},
},
session_state={
"routing_matrix": {
"roles": {
"coding": {
"candidates": [
{
"provider": "anthropic",
"model": "claude-sonnet-4-6",
"config": {"reasoning_effort": "high"},
},
]
}
routing_matrix={
"roles": {
"coding": {
"candidates": [
{
"provider": "anthropic",
"model": "claude-sonnet-4-6",
"config": {"reasoning_effort": "high"},
},
]
}
}
},
Expand Down Expand Up @@ -340,8 +332,7 @@ async def test_model_role_without_matrix_falls_through(self):
agents={
"test-agent": {"description": "A test agent"},
},
session_state={}, # No routing_matrix
)
) # routing_matrix=None by default — no routing matrix

result = await tool.execute(
{
Expand Down
Loading