Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 32 additions & 2 deletions .claude/skills/mobius-judge/scripts/record_verdict.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
sys.path.insert(0, "src")

from mobius.config import get_config
from mobius.db import init_db, row_to_dict
from mobius.db import init_db, row_to_dict, vec_to_blob
from mobius.embedder import embed
from mobius.memory import Memory
from mobius.models import MemoryEntry
from mobius.registry import Registry
from mobius.tournament import Tournament

Expand All @@ -39,9 +42,10 @@ def main():
reasoning = args[2]

config = get_config()
conn, _ = init_db(config)
conn, vec_available = init_db(config)
registry = Registry(conn, config)
tournament = Tournament(conn, config, registry)
memory = Memory(conn, config, vec_available)

# Get the match
if match_id:
Expand Down Expand Up @@ -122,6 +126,32 @@ def main():

conn.commit()

# Store in vector memory so future selections benefit.
# Only attempt when vec is available -- without it Memory.store() inserts
# into the memory table but cannot write the embedding to memory_vec,
# making the entry unsearchable.
task_text = match.get("task_description", "")
if vec_available and task_text and full_winner_id:
try:
# Reuse the embedding already stored on the match row when present,
# avoiding a redundant embed() call.
existing_blob = match.get("task_embedding")
if existing_blob:
task_blob = existing_blob if isinstance(existing_blob, bytes) else bytes(existing_blob)
else:
task_vec = embed(task_text, config)
task_blob = vec_to_blob(task_vec)

memory_entry = MemoryEntry(
task_embedding=task_blob,
task_text=task_text,
winning_agent_id=full_winner_id,
score=max(scores.values()) if scores else 0.0,
)
memory.store(memory_entry)
Comment thread
AaronGoldsmith marked this conversation as resolved.
except Exception as e:
print(f"Warning: failed to store memory entry: {e}", file=sys.stderr)

# Print results
winner = agents_by_id.get(full_winner_id)
print(f"Verdict recorded for match {mid[:8]}")
Expand Down
4 changes: 2 additions & 2 deletions src/mobius/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ class MobiusConfig(BaseModel):
embedding_model: str = "all-MiniLM-L6-v2"
embedding_dim: int = 384
memory_top_k: int = 5
similarity_specialist_threshold: float = 0.9
similarity_ensemble_threshold: float = 0.7
similarity_specialist_threshold: float = 0.5
similarity_ensemble_threshold: float = 0.3

# Self-improvement
max_agent_population: int = 50
Expand Down
13 changes: 12 additions & 1 deletion src/mobius/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,18 @@ def __init__(
self.vec_available = vec_available

def store(self, entry: MemoryEntry) -> None:
"""Store a task outcome in memory."""
"""Store a task outcome in memory, skipping duplicates."""
existing = self.conn.execute(
"SELECT id FROM memory WHERE task_text = ? AND winning_agent_id = ?",
(entry.task_text, entry.winning_agent_id),
).fetchone()
if existing:
logger.debug(
"Duplicate memory entry for agent %s on task, skipping",
entry.winning_agent_id,
)
return

row = dict_to_row(entry.model_dump(exclude={"task_embedding"}))
cols = ", ".join(row.keys())
placeholders = ", ".join(["?"] * len(row))
Expand Down
47 changes: 47 additions & 0 deletions tests/test_skill_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,53 @@ def test_elo_display_in_output(self, setup):
assert "Alpha" in output
assert "Beta" in output

def test_memory_store_skipped_when_vec_unavailable(self, setup):
"""Memory persistence is guarded by vec_available; when False no memory row is created."""
config, conn, registry, tournament, a1, a2, match = setup
scores = json.dumps({a1.id: 28.0, a2.id: 22.0})

# _run_script already patches init_db to return vec_available=False,
# so Memory.store should never be called.
self._run(conn, [a1.id, scores, "Alpha wins"])

mem_count = conn.execute("SELECT COUNT(*) as cnt FROM memory").fetchone()[0]
assert mem_count == 0, "No memory rows should be inserted when vec_available is False"

def test_memory_store_reuses_existing_embedding(self, setup):
"""When the match row already has a task_embedding blob, embed() is not called again."""
config, conn, registry, tournament, a1, a2, match = setup

# Inject a fake embedding blob into the match row
import numpy as np
from mobius.db import vec_to_blob
fake_blob = vec_to_blob(np.ones(256, dtype=np.float32))
conn.execute("UPDATE matches SET task_embedding = ? WHERE id = ?", (fake_blob, match.id))
conn.commit()

scores = json.dumps({a1.id: 28.0, a2.id: 22.0})

# Patch init_db to return vec_available=True and mock embed to track calls
from unittest.mock import patch as _patch, MagicMock
embed_mock = MagicMock()
with _patch("mobius.db.init_db", return_value=(_UnclosableConn(conn), True)), _patch("mobius.config.get_config", return_value=_CONFIG):
# We need to re-run with vec_available=True
import importlib.util, sys as _sys
spec = importlib.util.spec_from_file_location(
"record_verdict",
".claude/skills/mobius-judge/scripts/record_verdict.py",
)
mod = importlib.util.module_from_spec(spec)
wrapped = _UnclosableConn(conn)
with _patch.object(_sys, "argv", ["record_verdict.py", a1.id, scores, "Alpha wins"]), _patch("mobius.db.init_db", return_value=(wrapped, True)), _patch("mobius.embedder.embed", embed_mock):
spec.loader.exec_module(mod)
from io import StringIO
captured = StringIO()
with _patch("sys.stdout", captured):
mod.main()

# embed() should NOT have been called since match had an existing blob
embed_mock.assert_not_called()


# ---------------------------------------------------------------------------
# load_match tests
Expand Down
Loading