diff --git a/.claude/skills/mobius-judge/scripts/record_verdict.py b/.claude/skills/mobius-judge/scripts/record_verdict.py index 20616f5..1396348 100644 --- a/.claude/skills/mobius-judge/scripts/record_verdict.py +++ b/.claude/skills/mobius-judge/scripts/record_verdict.py @@ -15,7 +15,10 @@ sys.path.insert(0, "src") from mobius.config import get_config -from mobius.db import init_db, row_to_dict +from mobius.db import init_db, row_to_dict, vec_to_blob +from mobius.embedder import embed +from mobius.memory import Memory +from mobius.models import MemoryEntry from mobius.registry import Registry from mobius.tournament import Tournament @@ -39,9 +42,10 @@ def main(): reasoning = args[2] config = get_config() - conn, _ = init_db(config) + conn, vec_available = init_db(config) registry = Registry(conn, config) tournament = Tournament(conn, config, registry) + memory = Memory(conn, config, vec_available) # Get the match if match_id: @@ -122,6 +126,32 @@ def main(): conn.commit() + # Store in vector memory so future selections benefit. + # Only attempt when vec is available -- without it Memory.store() inserts + # into the memory table but cannot write the embedding to memory_vec, + # making the entry unsearchable. + task_text = match.get("task_description", "") + if vec_available and task_text and full_winner_id: + try: + # Reuse the embedding already stored on the match row when present, + # avoiding a redundant embed() call. + existing_blob = match.get("task_embedding") + if existing_blob: + task_blob = existing_blob if isinstance(existing_blob, bytes) else bytes(existing_blob) + else: + task_vec = embed(task_text, config) + task_blob = vec_to_blob(task_vec) + + memory_entry = MemoryEntry( + task_embedding=task_blob, + task_text=task_text, + winning_agent_id=full_winner_id, + score=max(scores.values()) if scores else 0.0, + ) + memory.store(memory_entry) + except Exception as e: + print(f"Warning: failed to store memory entry: {e}", file=sys.stderr) + # Print results winner = agents_by_id.get(full_winner_id) print(f"Verdict recorded for match {mid[:8]}") diff --git a/src/mobius/config.py b/src/mobius/config.py index f90fbba..79a4d33 100644 --- a/src/mobius/config.py +++ b/src/mobius/config.py @@ -41,8 +41,8 @@ class MobiusConfig(BaseModel): embedding_model: str = "all-MiniLM-L6-v2" embedding_dim: int = 384 memory_top_k: int = 5 - similarity_specialist_threshold: float = 0.9 - similarity_ensemble_threshold: float = 0.7 + similarity_specialist_threshold: float = 0.5 + similarity_ensemble_threshold: float = 0.3 # Self-improvement max_agent_population: int = 50 diff --git a/src/mobius/memory.py b/src/mobius/memory.py index 8f42a4b..1378485 100644 --- a/src/mobius/memory.py +++ b/src/mobius/memory.py @@ -35,7 +35,18 @@ def __init__( self.vec_available = vec_available def store(self, entry: MemoryEntry) -> None: - """Store a task outcome in memory.""" + """Store a task outcome in memory, skipping duplicates.""" + existing = self.conn.execute( + "SELECT id FROM memory WHERE task_text = ? AND winning_agent_id = ?", + (entry.task_text, entry.winning_agent_id), + ).fetchone() + if existing: + logger.debug( + "Duplicate memory entry for agent %s on task, skipping", + entry.winning_agent_id, + ) + return + row = dict_to_row(entry.model_dump(exclude={"task_embedding"})) cols = ", ".join(row.keys()) placeholders = ", ".join(["?"] * len(row)) diff --git a/tests/test_skill_scripts.py b/tests/test_skill_scripts.py index 6a180e5..a2e9ec6 100644 --- a/tests/test_skill_scripts.py +++ b/tests/test_skill_scripts.py @@ -186,6 +186,53 @@ def test_elo_display_in_output(self, setup): assert "Alpha" in output assert "Beta" in output + def test_memory_store_skipped_when_vec_unavailable(self, setup): + """Memory persistence is guarded by vec_available; when False no memory row is created.""" + config, conn, registry, tournament, a1, a2, match = setup + scores = json.dumps({a1.id: 28.0, a2.id: 22.0}) + + # _run_script already patches init_db to return vec_available=False, + # so Memory.store should never be called. + self._run(conn, [a1.id, scores, "Alpha wins"]) + + mem_count = conn.execute("SELECT COUNT(*) as cnt FROM memory").fetchone()[0] + assert mem_count == 0, "No memory rows should be inserted when vec_available is False" + + def test_memory_store_reuses_existing_embedding(self, setup): + """When the match row already has a task_embedding blob, embed() is not called again.""" + config, conn, registry, tournament, a1, a2, match = setup + + # Inject a fake embedding blob into the match row + import numpy as np + from mobius.db import vec_to_blob + fake_blob = vec_to_blob(np.ones(256, dtype=np.float32)) + conn.execute("UPDATE matches SET task_embedding = ? WHERE id = ?", (fake_blob, match.id)) + conn.commit() + + scores = json.dumps({a1.id: 28.0, a2.id: 22.0}) + + # Patch init_db to return vec_available=True and mock embed to track calls + from unittest.mock import patch as _patch, MagicMock + embed_mock = MagicMock() + with _patch("mobius.db.init_db", return_value=(_UnclosableConn(conn), True)), _patch("mobius.config.get_config", return_value=_CONFIG): + # We need to re-run with vec_available=True + import importlib.util, sys as _sys + spec = importlib.util.spec_from_file_location( + "record_verdict", + ".claude/skills/mobius-judge/scripts/record_verdict.py", + ) + mod = importlib.util.module_from_spec(spec) + wrapped = _UnclosableConn(conn) + with _patch.object(_sys, "argv", ["record_verdict.py", a1.id, scores, "Alpha wins"]), _patch("mobius.db.init_db", return_value=(wrapped, True)), _patch("mobius.embedder.embed", embed_mock): + spec.loader.exec_module(mod) + from io import StringIO + captured = StringIO() + with _patch("sys.stdout", captured): + mod.main() + + # embed() should NOT have been called since match had an existing blob + embed_mock.assert_not_called() + # --------------------------------------------------------------------------- # load_match tests