Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions code_review_graph/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ def hybrid_search(
context_files: Optional[list[str]] = None,
model: Optional[str] = None,
provider: Optional[str] = None,
_out_mode: Optional[list[str]] = None,
) -> list[dict[str, Any]]:
"""Hybrid search combining FTS5 BM25 and vector embeddings via RRF.

Expand All @@ -326,11 +327,18 @@ def hybrid_search(
limit: Maximum results to return (default 20).
context_files: Optional list of file paths. Nodes in these files
receive a 1.5x score boost.
_out_mode: Optional output list. If provided, a single string is
appended indicating which search path(s) contributed:
``"hybrid"`` (FTS + embeddings), ``"fts"`` (FTS only),
``"semantic"`` (embeddings only), ``"keyword"`` (LIKE fallback),
or ``"none"`` (empty query, or all search paths returned 0 results).

Returns:
List of dicts with node metadata and ``score`` field.
"""
if not query or not query.strip():
if _out_mode is not None:
_out_mode.append("none")
return []

# NOTE: hybrid_search uses store._conn for FTS5 and keyword queries
Expand Down Expand Up @@ -362,11 +370,22 @@ def hybrid_search(
if emb_results:
lists_to_merge.append(emb_results)
merged = rrf_merge(*lists_to_merge)
if _out_mode is not None:
if fts_results and emb_results:
_out_mode.append("hybrid")
elif fts_results:
_out_mode.append("fts")
else:
_out_mode.append("semantic")
else:
# Fallback: keyword LIKE matching
keyword_results = _keyword_search(conn, query, limit=fetch_limit)
if not keyword_results:
if _out_mode is not None:
_out_mode.append("none")
return []
if _out_mode is not None:
_out_mode.append("keyword")
merged = keyword_results

# ------ Phase 3+4: Batch-fetch nodes, apply boosting and kind filter ------
Expand Down
7 changes: 3 additions & 4 deletions code_review_graph/tools/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,14 +403,13 @@ def semantic_search_nodes(
"""
store, root = _get_store(repo_root)
try:
mode_out: list[str] = []
results = hybrid_search(
store, query, kind=kind, limit=limit, context_files=context_files,
model=model, provider=provider,
model=model, provider=provider, _out_mode=mode_out,
)

search_mode = "hybrid"
if not results:
search_mode = "keyword"
search_mode = mode_out[0] if mode_out else "keyword"

summary = f"Found {len(results)} node(s) matching '{query}'" + (
f" (kind={kind})" if kind else ""
Expand Down
70 changes: 70 additions & 0 deletions tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,76 @@ def test_fts_query_with_special_chars(self):
# Just assert no exception was raised
assert isinstance(results, list)

# --- _out_mode tracking ---

def test_out_mode_fts_only(self):
"""_out_mode is 'fts' when only FTS contributes (no embeddings)."""
rebuild_fts_index(self.store)
out: list[str] = []
results = hybrid_search(self.store, "authenticate", _out_mode=out)
assert out == ["fts"]
assert len(results) > 0

def test_out_mode_keyword(self):
"""_out_mode is 'keyword' when FTS table is absent and no embeddings."""
self.store._conn.execute("DROP TABLE IF EXISTS nodes_fts")
self.store._conn.commit()
out: list[str] = []
results = hybrid_search(self.store, "authenticate", _out_mode=out)
assert out == ["keyword"]
assert len(results) > 0

def test_out_mode_keyword_no_results(self):
"""_out_mode is 'none' when keyword fallback also returns 0 results."""
self.store._conn.execute("DROP TABLE IF EXISTS nodes_fts")
self.store._conn.commit()
out: list[str] = []
results = hybrid_search(self.store, "xyzzy_nonexistent_abc123", _out_mode=out)
assert results == []
assert out == ["none"]

def test_out_mode_semantic(self, monkeypatch):
"""_out_mode is 'semantic' when only embeddings contribute."""
import code_review_graph.search as search_mod

node_id = self.store._conn.execute(
"SELECT id FROM nodes WHERE name = 'authenticate'"
).fetchone()[0]

def fake_emb(store, query, limit=50, model=None, provider=None):
return [(node_id, 0.9)]

monkeypatch.setattr(search_mod, "_embedding_search", fake_emb)
out: list[str] = []
results = hybrid_search(self.store, "authenticate", _out_mode=out)
assert out == ["semantic"]
assert len(results) > 0

def test_out_mode_hybrid(self, monkeypatch):
"""_out_mode is 'hybrid' when both FTS and embeddings contribute."""
import code_review_graph.search as search_mod

rebuild_fts_index(self.store)
node_id = self.store._conn.execute(
"SELECT id FROM nodes WHERE name = 'authenticate'"
).fetchone()[0]

def fake_emb(store, query, limit=50, model=None, provider=None):
return [(node_id, 0.9)]

monkeypatch.setattr(search_mod, "_embedding_search", fake_emb)
out: list[str] = []
results = hybrid_search(self.store, "authenticate", _out_mode=out)
assert out == ["hybrid"]
assert len(results) > 0

def test_out_mode_empty_query(self):
"""_out_mode is 'none' for empty queries (no search ran)."""
out: list[str] = []
results = hybrid_search(self.store, "", _out_mode=out)
assert results == []
assert out == ["none"]

def test_fts_rebuild_is_atomic(self):
"""Regression test for #259: rebuild_fts_index must wrap the DROP +
CREATE + INSERT sequence in a single transaction so a crash between
Expand Down
20 changes: 20 additions & 0 deletions tests/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,26 @@ def test_multi_word_search(self):
names = {r.name for r in results}
assert "login" in names or "AuthService" in names

def test_search_mode_fts(self, monkeypatch, tmp_path):
"""semantic_search_nodes reports search_mode='fts' when only FTS contributes."""
import code_review_graph.tools.query as query_mod
from code_review_graph.search import rebuild_fts_index
from code_review_graph.tools.query import semantic_search_nodes

tmp_db = tmp_path / "test.db"
store = GraphStore(tmp_db)
store.upsert_node(NodeInfo(
kind="Function", name="login", file_path="/repo/auth.py",
line_start=1, line_end=10, language="python",
))
store.commit()
rebuild_fts_index(store)

monkeypatch.setattr(query_mod, "_get_store", lambda repo_root=None: (store, tmp_path))
result = semantic_search_nodes("login")
assert result["status"] == "ok"
assert result["search_mode"] == "fts"

def test_search_edges_by_target_name(self):
"""Search for edges by unqualified target name."""
# Add an edge with bare target name
Expand Down