From 1aecc4bacc56ee0e4a78ea11633d8e6eb553043a Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 16 Mar 2026 03:31:12 +0000 Subject: [PATCH 1/4] Add diverge generation strategy: parallel cognitive stances with judge ranking Introduces a new agent creation method that spawns N parallel Opus calls, each seeded with a different cognitive stance (minimalist, lateral, systems, naive, adversarial), then optionally ranks them via the judge panel before registration. This produces genuinely diverse agents from a single task. - Add `stance` field to AgentRecord (models.py) + DB migration (db.py) - Add `AgentBuilder.diverge()` with parallel async generation + judge ranking - Add `mobius diverge "" --n 5 --no-judge` CLI command - Add `--strategy diverge --task ""` flag to `mobius bootstrap` https://claude.ai/code/session_012PH76ua8FpCuomTWXS1B9W --- src/mobius/agent_builder.py | 133 ++++++++++++++++++++++++++++++++++++ src/mobius/cli.py | 96 +++++++++++++++++++++++--- src/mobius/db.py | 8 +++ src/mobius/models.py | 1 + 4 files changed, 228 insertions(+), 10 deletions(-) diff --git a/src/mobius/agent_builder.py b/src/mobius/agent_builder.py index 86cad05..b0f6307 100644 --- a/src/mobius/agent_builder.py +++ b/src/mobius/agent_builder.py @@ -2,6 +2,7 @@ from __future__ import annotations +import asyncio import json import logging @@ -115,6 +116,30 @@ ("security-auditor", "Security specialist that identifies vulnerabilities and proposes fixes"), ] +# Default cognitive stances for diverge generation +DEFAULT_STANCES: list[tuple[str, str]] = [ + ("minimalist", "Strip the problem to its irreducible core. Remove every unnecessary concept, constraint, and abstraction. The best solution is the one with the fewest moving parts."), + ("lateral", "Find the non-obvious or inverted approach. Reframe the problem entirely. What would a solution look like if you solved the opposite problem? What adjacent domain has already solved this?"), + ("systems", "Model dependencies and second-order effects. What are the feedback loops? What are the failure cascades? Think about the system the solution lives in, not just the solution itself."), + ("naive", "Question every assumption from first principles. Why does this problem exist? What if the constraints aren't real? Pretend you've never seen a solution to this class of problem before."), + ("adversarial", "Actively look for ways the task could fail or be gamed. What inputs break the solution? What edge cases are being ignored? Build the agent that would survive hostile conditions."), +] + +DIVERGE_PROMPT_TEMPLATE = """Create a specialized agent for this task, approaching it through a specific cognitive stance. + +## Task +{task} + +## Your Cognitive Stance: {stance_name} +{stance_description} + +Apply this stance deeply — it should shape the agent's entire system prompt, not just be mentioned superficially. The stance determines HOW the agent thinks about problems, what it prioritizes, and what it watches for. + +## Requirements +- The system prompt must reflect the {stance_name} stance throughout +- Make the agent genuinely different from what a generic builder would produce +- The agent should be effective at the task, not just philosophically interesting""" + def _parse_agent_json(raw: str) -> dict | list | None: """Extract agent JSON (object or array) from potentially noisy LLM output. @@ -363,6 +388,114 @@ async def bootstrap( logger.warning("Failed to create agent for: %s", spec) return agents + async def diverge( + self, + task: str, + n: int = 5, + judge: bool = True, + stances: list[tuple[str, str]] | None = None, + ) -> list[AgentRecord]: + """Spawn N parallel Opus calls, each with a different cognitive stance. + + If judge=True, evaluates the candidate system prompts via the judge + panel and returns results ranked by score (best first). + If judge=False, returns all candidates unranked. + """ + stance_pool = stances or DEFAULT_STANCES + # Cycle through stances if n > len(stances) + selected_stances = [stance_pool[i % len(stance_pool)] for i in range(n)] + + async def _generate_one(stance_name: str, stance_desc: str) -> AgentRecord | None: + prompt = DIVERGE_PROMPT_TEMPLATE.format( + task=task, + stance_name=stance_name, + stance_description=stance_desc, + ) + result = await run_judge( + prompt=prompt, + system_prompt=BUILDER_SYSTEM_PROMPT, + provider_name=self.builder_provider, + model=self.builder_model, + ) + if not result.success: + logger.error("Diverge (%s) failed: %s", stance_name, result.error) + return None + + data = _parse_agent_json(result.output) + if data is None: + logger.error("Diverge (%s) could not parse output", stance_name) + return None + + try: + # Append stance to slug for uniqueness + base_slug = data.get("slug", f"diverge-{stance_name}") + return AgentRecord( + name=data["name"], + slug=f"{base_slug}-{stance_name}", + description=data["description"], + system_prompt=data["system_prompt"], + provider=data.get("provider", "anthropic"), + model=data.get("model", "claude-haiku-4-5-20251001"), + tools=data.get("tools", ["Bash", "Read", "Grep", "Glob"]), + specializations=data.get("specializations", []), + stance=stance_name, + ) + except Exception as e: + logger.error("Diverge (%s) invalid agent definition: %s", stance_name, e) + return None + + # Run all generations in parallel + tasks = [ + _generate_one(name, desc) for name, desc in selected_stances + ] + results = await asyncio.gather(*tasks, return_exceptions=True) + + candidates = [] + for r in results: + if isinstance(r, Exception): + logger.error("Diverge task exception: %s", r) + elif r is not None: + candidates.append(r) + + logger.info("Diverge produced %d/%d candidates", len(candidates), n) + + if not candidates: + return [] + + if not judge or len(candidates) < 2: + return candidates + + # Judge the candidates by evaluating their system prompts as outputs + from mobius.judge import JudgePanel + panel = JudgePanel(self.config) + + # Build pseudo-outputs: task = the original task, outputs = system prompts + judge_task = ( + f"Evaluate these agent system prompts designed for the following task. " + f"Judge which prompt would produce the best agent — considering specificity, " + f"actionability, coverage of edge cases, and creative approach.\n\n" + f"## Original Task\n{task}" + ) + prompt_outputs = { + agent.id: f"[Stance: {agent.stance}]\n\n{agent.system_prompt}" + for agent in candidates + } + + verdict, _ = await panel.evaluate(judge_task, prompt_outputs) + + if verdict and verdict.scores: + # Sort by score descending + score_map = verdict.scores + candidates.sort( + key=lambda a: score_map.get(a.id, 0), reverse=True + ) + logger.info( + "Diverge judge ranked candidates: %s", + [(a.slug, score_map.get(a.id, 0)) for a in candidates], + ) + + return candidates + async def scout(self, codebase_summary: str, count: int = 5) -> list[AgentRecord]: """Analyze a codebase and generate specialized agents.""" prompt = SCOUT_PROMPT_TEMPLATE.format( diff --git a/src/mobius/cli.py b/src/mobius/cli.py index 84e4ddc..263961b 100644 --- a/src/mobius/cli.py +++ b/src/mobius/cli.py @@ -158,6 +158,9 @@ def run( @app.command() def bootstrap( + strategy: str = typer.Option("default", "--strategy", "-s", help="Strategy: 'default' (sequential specializations) or 'diverge' (parallel stances)"), + task: str = typer.Option(None, "--task", "-t", help="Task description (required for --strategy diverge)"), + n: int = typer.Option(5, "--n", "-n", help="Number of agents for diverge strategy"), verbose: bool = typer.Option(False, "--verbose", "-v"), ): """Seed initial agents via the Agent Builder (Opus).""" @@ -172,17 +175,31 @@ def bootstrap( raise typer.Exit(0) builder = AgentBuilder(config) - console.print("[bold]Bootstrapping agents via Opus...[/bold]") - agents = asyncio.run(builder.bootstrap()) - for agent in agents: - # Check for slug conflict - if registry.get_agent_by_slug(agent.slug): - console.print(f"[yellow]Skipping {agent.slug} — already exists[/yellow]") - continue - agent.is_champion = True # First of their kind = champion - registry.create_agent(agent) - console.print(f"[green]Created: {agent.name} ({agent.provider}/{agent.model})[/green]") + if strategy == "diverge": + if not task: + console.print("[red]--task is required with --strategy diverge[/red]") + raise typer.Exit(1) + console.print(f"[bold]Bootstrapping via diverge ({n} stances)...[/bold]") + agents = asyncio.run(builder.diverge(task, n=n, judge=True)) + for i, agent in enumerate(agents): + if registry.get_agent_by_slug(agent.slug): + console.print(f"[yellow]Skipping {agent.slug} — already exists[/yellow]") + continue + agent.is_champion = (i == 0) # Judge winner is champion + registry.create_agent(agent) + label = " [green](champion)[/green]" if agent.is_champion else "" + console.print(f"[green]Created: {agent.name} ({agent.provider}/{agent.model}) stance={agent.stance}{label}[/green]") + else: + console.print("[bold]Bootstrapping agents via Opus...[/bold]") + agents = asyncio.run(builder.bootstrap()) + for agent in agents: + if registry.get_agent_by_slug(agent.slug): + console.print(f"[yellow]Skipping {agent.slug} — already exists[/yellow]") + continue + agent.is_champion = True # First of their kind = champion + registry.create_agent(agent) + console.print(f"[green]Created: {agent.name} ({agent.provider}/{agent.model})[/green]") console.print(f"\n[bold green]Bootstrapped {len(agents)} agents.[/bold green]") conn.close() @@ -330,6 +347,65 @@ def evolve( conn.close() +@app.command() +def diverge( + task: str = typer.Argument(..., help="Task description to generate diverse agents for"), + n: int = typer.Option(5, "--n", "-n", help="Number of divergent agents to generate"), + no_judge: bool = typer.Option(False, "--no-judge", help="Skip judge ranking — register all candidates"), + verbose: bool = typer.Option(False, "--verbose", "-v"), +): + """Generate N agents from different cognitive stances in parallel, optionally judge-ranked. + + Each agent is seeded with a distinct thinking style (minimalist, lateral, + systems, naive, adversarial) so they approach the same task differently. + With judging enabled (default), the best prompt wins champion status. + """ + _setup_logging(verbose) + config, conn, registry, *_ = _get_components()[:3] + from mobius.agent_builder import AgentBuilder + + builder = AgentBuilder(config) + judge = not no_judge + + console.print(f"[bold]Diverging {n} agents for:[/bold] {task[:120]}") + console.print(f"[dim]Judge ranking: {'enabled' if judge else 'disabled'}[/dim]") + console.print() + + candidates = asyncio.run(builder.diverge(task, n=n, judge=judge)) + + if not candidates: + console.print("[red]No candidates generated. Check API keys and logs.[/red]") + raise typer.Exit(1) + + # Register candidates + for i, agent in enumerate(candidates): + # Deduplicate slug + if registry.get_agent_by_slug(agent.slug): + agent.slug = f"{agent.slug}-{agent.id[:6]}" + + # First candidate is champion if judged, otherwise all are candidates + if judge and i == 0: + agent.is_champion = True + + registry.create_agent(agent) + + rank = f"[green]#1 CHAMPION[/green]" if (judge and i == 0) else f"#{i+1}" + console.print( + f" {rank} [cyan]{agent.name}[/cyan] " + f"({agent.provider}/{agent.model}) " + f"stance=[bold]{agent.stance}[/bold]" + ) + console.print(f" [dim]{agent.description}[/dim]") + + console.print(f"\n[bold green]Registered {len(candidates)} divergent agents.[/bold green]") + if judge: + console.print("[dim]Winner promoted to champion. Others enter as challengers.[/dim]") + else: + console.print("[dim]All registered as candidates — the swarm will sort them out.[/dim]") + + conn.close() + + @app.command() def leaderboard( specialization: str = typer.Option(None, "--spec", "-s", help="Filter by specialization"), diff --git a/src/mobius/db.py b/src/mobius/db.py index e77d51d..9251afe 100644 --- a/src/mobius/db.py +++ b/src/mobius/db.py @@ -32,6 +32,7 @@ tools TEXT NOT NULL DEFAULT '[]', -- JSON array max_turns INTEGER NOT NULL DEFAULT 10, specializations TEXT NOT NULL DEFAULT '[]', -- JSON array + stance TEXT, -- cognitive stance (diverge generation) generation INTEGER NOT NULL DEFAULT 1, parent_id TEXT, is_champion INTEGER NOT NULL DEFAULT 0, @@ -144,6 +145,13 @@ def init_db(config: MobiusConfig) -> tuple[sqlite3.Connection, bool]: "INSERT INTO schema_version (version) VALUES (?)", (SCHEMA_VERSION,) ) + # Migration: add stance column if missing + try: + conn.execute("SELECT stance FROM agents LIMIT 1") + except sqlite3.OperationalError: + conn.execute("ALTER TABLE agents ADD COLUMN stance TEXT") + logger.info("Migrated: added stance column to agents") + # Migration: ensure all agents have "Bash" in their tools rows = conn.execute("SELECT id, tools FROM agents").fetchall() for row in rows: diff --git a/src/mobius/models.py b/src/mobius/models.py index ade57cc..195312a 100644 --- a/src/mobius/models.py +++ b/src/mobius/models.py @@ -33,6 +33,7 @@ class AgentRecord(BaseModel): tools: list[str] = Field(default_factory=lambda: ["Bash", "Read", "Grep", "Glob"]) max_turns: int = 10 specializations: list[str] = Field(default_factory=list) + stance: str | None = None # cognitive stance used during diverge generation generation: int = 1 parent_id: str | None = None is_champion: bool = False From b4a1e12cbd07e58f3f6c76259bd548720bb8d928 Mon Sep 17 00:00:00 2001 From: Aaron Goldsmith Date: Sun, 15 Mar 2026 22:05:17 -0700 Subject: [PATCH 2/4] Revert "Add diverge generation strategy: parallel cognitive stances with judge ranking" This reverts commit 1aecc4bacc56ee0e4a78ea11633d8e6eb553043a. --- src/mobius/agent_builder.py | 133 ------------------------------------ src/mobius/cli.py | 96 +++----------------------- src/mobius/db.py | 8 --- src/mobius/models.py | 1 - 4 files changed, 10 insertions(+), 228 deletions(-) diff --git a/src/mobius/agent_builder.py b/src/mobius/agent_builder.py index b0f6307..86cad05 100644 --- a/src/mobius/agent_builder.py +++ b/src/mobius/agent_builder.py @@ -2,7 +2,6 @@ from __future__ import annotations -import asyncio import json import logging @@ -116,30 +115,6 @@ ("security-auditor", "Security specialist that identifies vulnerabilities and proposes fixes"), ] -# Default cognitive stances for diverge generation -DEFAULT_STANCES: list[tuple[str, str]] = [ - ("minimalist", "Strip the problem to its irreducible core. Remove every unnecessary concept, constraint, and abstraction. The best solution is the one with the fewest moving parts."), - ("lateral", "Find the non-obvious or inverted approach. Reframe the problem entirely. What would a solution look like if you solved the opposite problem? What adjacent domain has already solved this?"), - ("systems", "Model dependencies and second-order effects. What are the feedback loops? What are the failure cascades? Think about the system the solution lives in, not just the solution itself."), - ("naive", "Question every assumption from first principles. Why does this problem exist? What if the constraints aren't real? Pretend you've never seen a solution to this class of problem before."), - ("adversarial", "Actively look for ways the task could fail or be gamed. What inputs break the solution? What edge cases are being ignored? Build the agent that would survive hostile conditions."), -] - -DIVERGE_PROMPT_TEMPLATE = """Create a specialized agent for this task, approaching it through a specific cognitive stance. - -## Task -{task} - -## Your Cognitive Stance: {stance_name} -{stance_description} - -Apply this stance deeply — it should shape the agent's entire system prompt, not just be mentioned superficially. The stance determines HOW the agent thinks about problems, what it prioritizes, and what it watches for. - -## Requirements -- The system prompt must reflect the {stance_name} stance throughout -- Make the agent genuinely different from what a generic builder would produce -- The agent should be effective at the task, not just philosophically interesting""" - def _parse_agent_json(raw: str) -> dict | list | None: """Extract agent JSON (object or array) from potentially noisy LLM output. @@ -388,114 +363,6 @@ async def bootstrap( logger.warning("Failed to create agent for: %s", spec) return agents - async def diverge( - self, - task: str, - n: int = 5, - judge: bool = True, - stances: list[tuple[str, str]] | None = None, - ) -> list[AgentRecord]: - """Spawn N parallel Opus calls, each with a different cognitive stance. - - If judge=True, evaluates the candidate system prompts via the judge - panel and returns results ranked by score (best first). - If judge=False, returns all candidates unranked. - """ - stance_pool = stances or DEFAULT_STANCES - # Cycle through stances if n > len(stances) - selected_stances = [stance_pool[i % len(stance_pool)] for i in range(n)] - - async def _generate_one(stance_name: str, stance_desc: str) -> AgentRecord | None: - prompt = DIVERGE_PROMPT_TEMPLATE.format( - task=task, - stance_name=stance_name, - stance_description=stance_desc, - ) - result = await run_judge( - prompt=prompt, - system_prompt=BUILDER_SYSTEM_PROMPT, - provider_name=self.builder_provider, - model=self.builder_model, - ) - if not result.success: - logger.error("Diverge (%s) failed: %s", stance_name, result.error) - return None - - data = _parse_agent_json(result.output) - if data is None: - logger.error("Diverge (%s) could not parse output", stance_name) - return None - - try: - # Append stance to slug for uniqueness - base_slug = data.get("slug", f"diverge-{stance_name}") - return AgentRecord( - name=data["name"], - slug=f"{base_slug}-{stance_name}", - description=data["description"], - system_prompt=data["system_prompt"], - provider=data.get("provider", "anthropic"), - model=data.get("model", "claude-haiku-4-5-20251001"), - tools=data.get("tools", ["Bash", "Read", "Grep", "Glob"]), - specializations=data.get("specializations", []), - stance=stance_name, - ) - except Exception as e: - logger.error("Diverge (%s) invalid agent definition: %s", stance_name, e) - return None - - # Run all generations in parallel - tasks = [ - _generate_one(name, desc) for name, desc in selected_stances - ] - results = await asyncio.gather(*tasks, return_exceptions=True) - - candidates = [] - for r in results: - if isinstance(r, Exception): - logger.error("Diverge task exception: %s", r) - elif r is not None: - candidates.append(r) - - logger.info("Diverge produced %d/%d candidates", len(candidates), n) - - if not candidates: - return [] - - if not judge or len(candidates) < 2: - return candidates - - # Judge the candidates by evaluating their system prompts as outputs - from mobius.judge import JudgePanel - panel = JudgePanel(self.config) - - # Build pseudo-outputs: task = the original task, outputs = system prompts - judge_task = ( - f"Evaluate these agent system prompts designed for the following task. " - f"Judge which prompt would produce the best agent — considering specificity, " - f"actionability, coverage of edge cases, and creative approach.\n\n" - f"## Original Task\n{task}" - ) - prompt_outputs = { - agent.id: f"[Stance: {agent.stance}]\n\n{agent.system_prompt}" - for agent in candidates - } - - verdict, _ = await panel.evaluate(judge_task, prompt_outputs) - - if verdict and verdict.scores: - # Sort by score descending - score_map = verdict.scores - candidates.sort( - key=lambda a: score_map.get(a.id, 0), reverse=True - ) - logger.info( - "Diverge judge ranked candidates: %s", - [(a.slug, score_map.get(a.id, 0)) for a in candidates], - ) - - return candidates - async def scout(self, codebase_summary: str, count: int = 5) -> list[AgentRecord]: """Analyze a codebase and generate specialized agents.""" prompt = SCOUT_PROMPT_TEMPLATE.format( diff --git a/src/mobius/cli.py b/src/mobius/cli.py index 263961b..84e4ddc 100644 --- a/src/mobius/cli.py +++ b/src/mobius/cli.py @@ -158,9 +158,6 @@ def run( @app.command() def bootstrap( - strategy: str = typer.Option("default", "--strategy", "-s", help="Strategy: 'default' (sequential specializations) or 'diverge' (parallel stances)"), - task: str = typer.Option(None, "--task", "-t", help="Task description (required for --strategy diverge)"), - n: int = typer.Option(5, "--n", "-n", help="Number of agents for diverge strategy"), verbose: bool = typer.Option(False, "--verbose", "-v"), ): """Seed initial agents via the Agent Builder (Opus).""" @@ -175,31 +172,17 @@ def bootstrap( raise typer.Exit(0) builder = AgentBuilder(config) + console.print("[bold]Bootstrapping agents via Opus...[/bold]") - if strategy == "diverge": - if not task: - console.print("[red]--task is required with --strategy diverge[/red]") - raise typer.Exit(1) - console.print(f"[bold]Bootstrapping via diverge ({n} stances)...[/bold]") - agents = asyncio.run(builder.diverge(task, n=n, judge=True)) - for i, agent in enumerate(agents): - if registry.get_agent_by_slug(agent.slug): - console.print(f"[yellow]Skipping {agent.slug} — already exists[/yellow]") - continue - agent.is_champion = (i == 0) # Judge winner is champion - registry.create_agent(agent) - label = " [green](champion)[/green]" if agent.is_champion else "" - console.print(f"[green]Created: {agent.name} ({agent.provider}/{agent.model}) stance={agent.stance}{label}[/green]") - else: - console.print("[bold]Bootstrapping agents via Opus...[/bold]") - agents = asyncio.run(builder.bootstrap()) - for agent in agents: - if registry.get_agent_by_slug(agent.slug): - console.print(f"[yellow]Skipping {agent.slug} — already exists[/yellow]") - continue - agent.is_champion = True # First of their kind = champion - registry.create_agent(agent) - console.print(f"[green]Created: {agent.name} ({agent.provider}/{agent.model})[/green]") + agents = asyncio.run(builder.bootstrap()) + for agent in agents: + # Check for slug conflict + if registry.get_agent_by_slug(agent.slug): + console.print(f"[yellow]Skipping {agent.slug} — already exists[/yellow]") + continue + agent.is_champion = True # First of their kind = champion + registry.create_agent(agent) + console.print(f"[green]Created: {agent.name} ({agent.provider}/{agent.model})[/green]") console.print(f"\n[bold green]Bootstrapped {len(agents)} agents.[/bold green]") conn.close() @@ -347,65 +330,6 @@ def evolve( conn.close() -@app.command() -def diverge( - task: str = typer.Argument(..., help="Task description to generate diverse agents for"), - n: int = typer.Option(5, "--n", "-n", help="Number of divergent agents to generate"), - no_judge: bool = typer.Option(False, "--no-judge", help="Skip judge ranking — register all candidates"), - verbose: bool = typer.Option(False, "--verbose", "-v"), -): - """Generate N agents from different cognitive stances in parallel, optionally judge-ranked. - - Each agent is seeded with a distinct thinking style (minimalist, lateral, - systems, naive, adversarial) so they approach the same task differently. - With judging enabled (default), the best prompt wins champion status. - """ - _setup_logging(verbose) - config, conn, registry, *_ = _get_components()[:3] - from mobius.agent_builder import AgentBuilder - - builder = AgentBuilder(config) - judge = not no_judge - - console.print(f"[bold]Diverging {n} agents for:[/bold] {task[:120]}") - console.print(f"[dim]Judge ranking: {'enabled' if judge else 'disabled'}[/dim]") - console.print() - - candidates = asyncio.run(builder.diverge(task, n=n, judge=judge)) - - if not candidates: - console.print("[red]No candidates generated. Check API keys and logs.[/red]") - raise typer.Exit(1) - - # Register candidates - for i, agent in enumerate(candidates): - # Deduplicate slug - if registry.get_agent_by_slug(agent.slug): - agent.slug = f"{agent.slug}-{agent.id[:6]}" - - # First candidate is champion if judged, otherwise all are candidates - if judge and i == 0: - agent.is_champion = True - - registry.create_agent(agent) - - rank = f"[green]#1 CHAMPION[/green]" if (judge and i == 0) else f"#{i+1}" - console.print( - f" {rank} [cyan]{agent.name}[/cyan] " - f"({agent.provider}/{agent.model}) " - f"stance=[bold]{agent.stance}[/bold]" - ) - console.print(f" [dim]{agent.description}[/dim]") - - console.print(f"\n[bold green]Registered {len(candidates)} divergent agents.[/bold green]") - if judge: - console.print("[dim]Winner promoted to champion. Others enter as challengers.[/dim]") - else: - console.print("[dim]All registered as candidates — the swarm will sort them out.[/dim]") - - conn.close() - - @app.command() def leaderboard( specialization: str = typer.Option(None, "--spec", "-s", help="Filter by specialization"), diff --git a/src/mobius/db.py b/src/mobius/db.py index 9251afe..e77d51d 100644 --- a/src/mobius/db.py +++ b/src/mobius/db.py @@ -32,7 +32,6 @@ tools TEXT NOT NULL DEFAULT '[]', -- JSON array max_turns INTEGER NOT NULL DEFAULT 10, specializations TEXT NOT NULL DEFAULT '[]', -- JSON array - stance TEXT, -- cognitive stance (diverge generation) generation INTEGER NOT NULL DEFAULT 1, parent_id TEXT, is_champion INTEGER NOT NULL DEFAULT 0, @@ -145,13 +144,6 @@ def init_db(config: MobiusConfig) -> tuple[sqlite3.Connection, bool]: "INSERT INTO schema_version (version) VALUES (?)", (SCHEMA_VERSION,) ) - # Migration: add stance column if missing - try: - conn.execute("SELECT stance FROM agents LIMIT 1") - except sqlite3.OperationalError: - conn.execute("ALTER TABLE agents ADD COLUMN stance TEXT") - logger.info("Migrated: added stance column to agents") - # Migration: ensure all agents have "Bash" in their tools rows = conn.execute("SELECT id, tools FROM agents").fetchall() for row in rows: diff --git a/src/mobius/models.py b/src/mobius/models.py index 195312a..ade57cc 100644 --- a/src/mobius/models.py +++ b/src/mobius/models.py @@ -33,7 +33,6 @@ class AgentRecord(BaseModel): tools: list[str] = Field(default_factory=lambda: ["Bash", "Read", "Grep", "Glob"]) max_turns: int = 10 specializations: list[str] = Field(default_factory=list) - stance: str | None = None # cognitive stance used during diverge generation generation: int = 1 parent_id: str | None = None is_champion: bool = False From 415f24e0e8912931b238e559fee9c72d9e5aa423 Mon Sep 17 00:00:00 2001 From: Aaron Goldsmith Date: Sun, 15 Mar 2026 22:28:48 -0700 Subject: [PATCH 3/4] Add semantic agent search via agent_vec and find_agents.py Enables skills (like /mobius-seed) to search the agent registry semantically before creating new agents, avoiding duplicates at scale (50-10k+ agents). Embeds agent descriptions at creation time using sentence-transformers + sqlite-vec. - Add agent_vec virtual table to db.py schema - Embed agent descriptions on create in registry.py - Add find_agents.py script for vec-based agent search - Update /mobius-seed skill to use semantic search - Pass vec_available through to Registry constructors Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude/skills/mobius-seed/SKILL.md | 6 +- .../mobius-seed/scripts/create_agent.py | 4 +- .../skills/mobius-seed/scripts/find_agents.py | 94 +++++++++++++++++++ src/mobius/cli.py | 4 +- src/mobius/db.py | 12 ++- src/mobius/registry.py | 21 ++++- 6 files changed, 131 insertions(+), 10 deletions(-) create mode 100644 .claude/skills/mobius-seed/scripts/find_agents.py diff --git a/.claude/skills/mobius-seed/SKILL.md b/.claude/skills/mobius-seed/SKILL.md index 392c1a6..66747a8 100644 --- a/.claude/skills/mobius-seed/SKILL.md +++ b/.claude/skills/mobius-seed/SKILL.md @@ -24,10 +24,11 @@ The `agent_builder.py` module calls the Anthropic API (Opus) to generate agent p python -m mobius.cli init ``` -2. **Check what already exists:** +2. **Check what already exists.** Use semantic search to find agents similar to what you're about to create: ```bash -python -m mobius.cli agent list +python .claude/skills/mobius-seed/scripts/find_agents.py "description of what you want to create" --top 10 ``` +This returns JSON with the most relevant existing agents. Skip creating agents that are too similar to what's already there. 3. **Craft your agent definitions.** Think carefully about: - What makes a great system prompt for this specialization @@ -69,3 +70,4 @@ python -m mobius.cli agent list - **Vary approaches**: Give agents different problem-solving styles (e.g., "think step by step" vs "output code immediately") - **Be specific**: Generic prompts lose to specific ones in tournaments. "You are a Python expert who prioritizes readability" beats "You are a helpful coding assistant." - If the user gives you a codebase path, READ the codebase first and create agents tailored to its tech stack, patterns, and common tasks. +- If the user gives you a specific task instead of a specialization, create multiple agents that approach that task from genuinely different angles — vary problem-solving style, priorities, and trade-offs. diff --git a/.claude/skills/mobius-seed/scripts/create_agent.py b/.claude/skills/mobius-seed/scripts/create_agent.py index b42350e..05e0d20 100644 --- a/.claude/skills/mobius-seed/scripts/create_agent.py +++ b/.claude/skills/mobius-seed/scripts/create_agent.py @@ -42,8 +42,8 @@ def main(): sys.exit(1) config = get_config() - conn, _ = init_db(config) - registry = Registry(conn, config) + conn, vec_available = init_db(config) + registry = Registry(conn, config, vec_available) # Check for duplicates existing = registry.get_agent_by_slug(data["slug"]) diff --git a/.claude/skills/mobius-seed/scripts/find_agents.py b/.claude/skills/mobius-seed/scripts/find_agents.py new file mode 100644 index 0000000..5f03cf1 --- /dev/null +++ b/.claude/skills/mobius-seed/scripts/find_agents.py @@ -0,0 +1,94 @@ +"""Find agents semantically similar to a query. + +Usage: + python find_agents.py "build a REST API with authentication" + python find_agents.py "debug memory leaks" --top 5 + +Returns JSON array of matching agents ranked by relevance. +""" + +import json +import sys + +sys.path.insert(0, "src") + +from mobius.config import get_config +from mobius.db import init_db, vec_to_blob +from mobius.embedder import embed +from mobius.registry import Registry + + +def main(): + if len(sys.argv) < 2: + print("Usage: python find_agents.py '' [--top N]") + sys.exit(1) + + query = sys.argv[1] + top_k = 10 + if "--top" in sys.argv: + idx = sys.argv.index("--top") + if idx + 1 < len(sys.argv): + top_k = int(sys.argv[idx + 1]) + + config = get_config() + conn, vec_available = init_db(config) + + if not vec_available: + print(json.dumps({"error": "sqlite-vec not available"})) + sys.exit(1) + + # Check if agent_vec has any rows + count = conn.execute("SELECT COUNT(*) as cnt FROM agent_vec").fetchone()["cnt"] + if count == 0: + print(json.dumps({"error": "No agent embeddings found. Run backfill first."})) + sys.exit(1) + + # Embed query and search + query_vec = embed(query, config) + query_blob = vec_to_blob(query_vec) + + rows = conn.execute( + """ + SELECT av.id, av.distance + FROM agent_vec av + WHERE av.description_embedding MATCH ? + AND k = ? + ORDER BY av.distance + """, + (query_blob, top_k), + ).fetchall() + + if not rows: + print(json.dumps([])) + conn.close() + return + + # Fetch full agent details for matches + registry = Registry(conn, config, vec_available) + results = [] + for row in rows: + agent = registry.get_agent(row["id"]) + if agent is None: + continue + distance = row["distance"] + similarity = 1.0 - (distance**2 / 2.0) + results.append({ + "slug": agent.slug, + "name": agent.name, + "description": agent.description, + "provider": agent.provider, + "model": agent.model, + "specializations": agent.specializations, + "elo": round(agent.elo_rating), + "win_rate": round(agent.win_rate, 3), + "matches": agent.total_matches, + "champion": agent.is_champion, + "similarity": round(similarity, 4), + }) + + print(json.dumps(results, indent=2)) + conn.close() + + +if __name__ == "__main__": + main() diff --git a/src/mobius/cli.py b/src/mobius/cli.py index 84e4ddc..b7ffb88 100644 --- a/src/mobius/cli.py +++ b/src/mobius/cli.py @@ -52,7 +52,7 @@ def _get_components(): config = get_config() conn, vec_available = init_db(config) - registry = Registry(conn, config) + registry = Registry(conn, config, vec_available) tournament = Tournament(conn, config, registry) memory = Memory(conn, config, vec_available) selector = Selector(registry, memory, config) @@ -78,7 +78,7 @@ def init(verbose: bool = typer.Option(False, "--verbose", "-v")): from mobius.registry import Registry from mobius.seeds import DEFAULT_AGENTS - registry = Registry(conn, config) + registry = Registry(conn, config, vec_available) seeded = 0 for agent in DEFAULT_AGENTS: if not registry.get_agent_by_slug(agent.slug): diff --git a/src/mobius/db.py b/src/mobius/db.py index e77d51d..67d4a7e 100644 --- a/src/mobius/db.py +++ b/src/mobius/db.py @@ -84,7 +84,7 @@ ); """ -# sqlite-vec virtual table created separately (requires extension) +# sqlite-vec virtual tables created separately (requires extension) VEC_TABLE_SQL = """ CREATE VIRTUAL TABLE IF NOT EXISTS memory_vec USING vec0( id TEXT PRIMARY KEY, @@ -92,6 +92,13 @@ ); """ +AGENT_VEC_TABLE_SQL = """ +CREATE VIRTUAL TABLE IF NOT EXISTS agent_vec USING vec0( + id TEXT PRIMARY KEY, + description_embedding FLOAT[{dim}] +); +""" + def _load_sqlite_vec(conn: sqlite3.Connection) -> bool: """Try to load the sqlite-vec extension. Returns True if successful.""" @@ -133,9 +140,10 @@ def init_db(config: MobiusConfig) -> tuple[sqlite3.Connection, bool]: # Create core schema conn.executescript(SCHEMA_SQL) - # Create vector table if extension is available + # Create vector tables if extension is available if vec_available: conn.execute(VEC_TABLE_SQL.format(dim=config.embedding_dim)) + conn.execute(AGENT_VEC_TABLE_SQL.format(dim=config.embedding_dim)) # Track schema version existing = conn.execute("SELECT version FROM schema_version").fetchone() diff --git a/src/mobius/registry.py b/src/mobius/registry.py index 50b1d38..a5c939b 100644 --- a/src/mobius/registry.py +++ b/src/mobius/registry.py @@ -8,7 +8,7 @@ from pathlib import Path from mobius.config import MobiusConfig -from mobius.db import dict_to_row, row_to_dict +from mobius.db import dict_to_row, row_to_dict, vec_to_blob from mobius.models import AgentRecord logger = logging.getLogger(__name__) @@ -17,9 +17,10 @@ class Registry: """Manages agent definitions in the database.""" - def __init__(self, conn: sqlite3.Connection, config: MobiusConfig): + def __init__(self, conn: sqlite3.Connection, config: MobiusConfig, vec_available: bool = False): self.conn = conn self.config = config + self.vec_available = vec_available def create_agent(self, agent: AgentRecord) -> AgentRecord: """Insert a new agent into the registry.""" @@ -30,10 +31,26 @@ def create_agent(self, agent: AgentRecord) -> AgentRecord: f"INSERT INTO agents ({cols}) VALUES ({placeholders})", list(row.values()), ) + + # Embed description for semantic search + if self.vec_available: + self._embed_agent(agent) + self.conn.commit() logger.info("Created agent: %s (%s)", agent.name, agent.slug) return agent + def _embed_agent(self, agent: AgentRecord) -> None: + """Embed an agent's description and store in agent_vec.""" + from mobius.embedder import embed + + text = f"{agent.name}: {agent.description}" + vec = embed(text, self.config) + self.conn.execute( + "INSERT OR REPLACE INTO agent_vec (id, description_embedding) VALUES (?, ?)", + (agent.id, vec_to_blob(vec)), + ) + def get_agent(self, agent_id: str) -> AgentRecord | None: """Fetch an agent by ID.""" row = self.conn.execute( From 918abde0bec8f0c58d94e6f3ac23080fcd8d183a Mon Sep 17 00:00:00 2001 From: Aaron Goldsmith Date: Sat, 21 Mar 2026 09:32:46 -0700 Subject: [PATCH 4/4] Fix PR review comments: add diverge command, validate inputs, ensure Bash in tools - Add `diverge` CLI command and `AgentBuilder.diverge()` method for generating N diverse agent variants per specialization - Validate `n >= 1` in both `bootstrap` and `diverge` commands - Validate `strategy` parameter in `bootstrap` against ("default", "diverge") - Ensure "Bash" is always present and first in tools list in diverge's _generate_one - Fix champion assignment in bootstrap diverge flow: track first actually inserted agent rather than using index-based assignment that breaks when slugs are skipped Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mobius/agent_builder.py | 59 +++++++++++++++++++++++++++++ src/mobius/cli.py | 75 ++++++++++++++++++++++++++++++++++--- 2 files changed, 129 insertions(+), 5 deletions(-) diff --git a/src/mobius/agent_builder.py b/src/mobius/agent_builder.py index 86cad05..b4f37fe 100644 --- a/src/mobius/agent_builder.py +++ b/src/mobius/agent_builder.py @@ -348,6 +348,65 @@ async def crossbreed( logger.error("Invalid crossbred agent: %s", e) return None + async def diverge( + self, + specialization: str, + description: str, + n: int = 3, + provider: ProviderType = "anthropic", + model: str | None = None, + ) -> list[AgentRecord]: + """Generate N diverse agent variants for a single specialization.""" + import asyncio + + async def _generate_one(index: int) -> AgentRecord | None: + prompt = f"""Create a specialized agent for: {specialization} + +Description: {description} + +The agent should use provider "{provider}" and model "{model or 'use your best judgment for the provider'}". + +This is variant #{index + 1} of {n}. Make it distinct from other possible approaches — +try a different methodology, tone, or problem-solving strategy while staying on-topic.""" + + result = await run_judge( + prompt=prompt, + system_prompt=BUILDER_SYSTEM_PROMPT, + provider_name=self.builder_provider, + model=self.builder_model, + ) + if not result.success: + logger.error("Diverge variant %d failed: %s", index, result.error) + return None + + data = _parse_agent_json(result.output) + if data is None: + return None + + try: + tools = data.get("tools", ["Bash", "Read", "Grep", "Glob"]) + # Ensure "Bash" is always present and first in the tools list + if "Bash" in tools: + tools.remove("Bash") + tools.insert(0, "Bash") + + return AgentRecord( + name=data["name"], + slug=data["slug"], + description=data["description"], + system_prompt=data["system_prompt"], + provider=data.get("provider", provider), + model=data.get("model", model or "claude-haiku-4-5-20251001"), + tools=tools, + specializations=data.get("specializations", [specialization]), + ) + except Exception as e: + logger.error("Invalid diverge variant %d: %s", index, e) + return None + + results = await asyncio.gather(*[_generate_one(i) for i in range(n)]) + return [r for r in results if r is not None] + async def bootstrap( self, ) -> list[AgentRecord]: diff --git a/src/mobius/cli.py b/src/mobius/cli.py index b7ffb88..0cfe1a3 100644 --- a/src/mobius/cli.py +++ b/src/mobius/cli.py @@ -158,10 +158,21 @@ def run( @app.command() def bootstrap( + n: int = typer.Option(3, "--agents", "-n", help="Number of agent variants per specialization (diverge mode)"), + strategy: str = typer.Option("default", "--strategy", "-s", help="Bootstrap strategy: 'default' or 'diverge'"), verbose: bool = typer.Option(False, "--verbose", "-v"), ): """Seed initial agents via the Agent Builder (Opus).""" _setup_logging(verbose) + + if n < 1: + console.print("[red]Error: --agents/-n must be >= 1.[/red]") + raise typer.Exit(1) + + if strategy not in ("default", "diverge"): + console.print(f"[red]Error: unknown strategy '{strategy}'. Must be 'default' or 'diverge'.[/red]") + raise typer.Exit(1) + config, conn, registry, *_ = _get_components()[:3] from mobius.agent_builder import AgentBuilder @@ -172,19 +183,73 @@ def bootstrap( raise typer.Exit(0) builder = AgentBuilder(config) - console.print("[bold]Bootstrapping agents via Opus...[/bold]") - agents = asyncio.run(builder.bootstrap()) + if strategy == "diverge": + from mobius.agent_builder import BOOTSTRAP_SPECIALIZATIONS + + console.print(f"[bold]Bootstrapping agents via diverge (n={n})...[/bold]") + champion_assigned = False + for spec, desc in BOOTSTRAP_SPECIALIZATIONS: + console.print(f"\n[bold]Diverging: {spec}[/bold]") + agents = asyncio.run(builder.diverge(spec, desc, n=n)) + for agent in agents: + if registry.get_agent_by_slug(agent.slug): + console.print(f"[yellow]Skipping {agent.slug} — already exists[/yellow]") + continue + if not champion_assigned: + agent.is_champion = True + champion_assigned = True + registry.create_agent(agent) + console.print(f"[green]Created: {agent.name} ({agent.provider}/{agent.model})[/green]") + else: + console.print("[bold]Bootstrapping agents via Opus...[/bold]") + agents = asyncio.run(builder.bootstrap()) + for agent in agents: + if registry.get_agent_by_slug(agent.slug): + console.print(f"[yellow]Skipping {agent.slug} — already exists[/yellow]") + continue + agent.is_champion = True + registry.create_agent(agent) + console.print(f"[green]Created: {agent.name} ({agent.provider}/{agent.model})[/green]") + + console.print(f"\n[bold green]Bootstrap complete.[/bold green]") + conn.close() + + +@app.command() +def diverge( + specialization: str = typer.Argument(..., help="Specialization to generate variants for"), + description: str = typer.Option("", "--desc", "-d", help="Description of the specialization"), + n: int = typer.Option(3, "--agents", "-n", help="Number of variants to generate"), + verbose: bool = typer.Option(False, "--verbose", "-v"), +): + """Generate N diverse agent variants for a single specialization.""" + _setup_logging(verbose) + + if n < 1: + console.print("[red]Error: --agents/-n must be >= 1.[/red]") + raise typer.Exit(1) + + config, conn, registry, *_ = _get_components()[:3] + from mobius.agent_builder import AgentBuilder + + builder = AgentBuilder(config) + console.print(f"[bold]Generating {n} variants for '{specialization}'...[/bold]") + + agents = asyncio.run(builder.diverge(specialization, description or specialization, n=n)) + + champion_assigned = False for agent in agents: - # Check for slug conflict if registry.get_agent_by_slug(agent.slug): console.print(f"[yellow]Skipping {agent.slug} — already exists[/yellow]") continue - agent.is_champion = True # First of their kind = champion + if not champion_assigned: + agent.is_champion = True + champion_assigned = True registry.create_agent(agent) console.print(f"[green]Created: {agent.name} ({agent.provider}/{agent.model})[/green]") - console.print(f"\n[bold green]Bootstrapped {len(agents)} agents.[/bold green]") + console.print(f"\n[bold green]Generated {len(agents)} variant(s).[/bold green]") conn.close()