diff --git a/code_review_graph/search.py b/code_review_graph/search.py index f172ea98..b262bf03 100644 --- a/code_review_graph/search.py +++ b/code_review_graph/search.py @@ -313,6 +313,7 @@ def hybrid_search( context_files: Optional[list[str]] = None, model: Optional[str] = None, provider: Optional[str] = None, + _out_mode: Optional[list[str]] = None, ) -> list[dict[str, Any]]: """Hybrid search combining FTS5 BM25 and vector embeddings via RRF. @@ -326,11 +327,18 @@ def hybrid_search( limit: Maximum results to return (default 20). context_files: Optional list of file paths. Nodes in these files receive a 1.5x score boost. + _out_mode: Optional output list. If provided, a single string is + appended indicating which search path(s) contributed: + ``"hybrid"`` (FTS + embeddings), ``"fts"`` (FTS only), + ``"semantic"`` (embeddings only), ``"keyword"`` (LIKE fallback), + or ``"none"`` (empty query, or all search paths returned 0 results). Returns: List of dicts with node metadata and ``score`` field. """ if not query or not query.strip(): + if _out_mode is not None: + _out_mode.append("none") return [] # NOTE: hybrid_search uses store._conn for FTS5 and keyword queries @@ -362,11 +370,22 @@ def hybrid_search( if emb_results: lists_to_merge.append(emb_results) merged = rrf_merge(*lists_to_merge) + if _out_mode is not None: + if fts_results and emb_results: + _out_mode.append("hybrid") + elif fts_results: + _out_mode.append("fts") + else: + _out_mode.append("semantic") else: # Fallback: keyword LIKE matching keyword_results = _keyword_search(conn, query, limit=fetch_limit) if not keyword_results: + if _out_mode is not None: + _out_mode.append("none") return [] + if _out_mode is not None: + _out_mode.append("keyword") merged = keyword_results # ------ Phase 3+4: Batch-fetch nodes, apply boosting and kind filter ------ diff --git a/code_review_graph/tools/query.py b/code_review_graph/tools/query.py index 3b442f8a..1f63215c 100644 --- a/code_review_graph/tools/query.py +++ b/code_review_graph/tools/query.py @@ -403,14 +403,13 @@ def semantic_search_nodes( """ store, root = _get_store(repo_root) try: + mode_out: list[str] = [] results = hybrid_search( store, query, kind=kind, limit=limit, context_files=context_files, - model=model, provider=provider, + model=model, provider=provider, _out_mode=mode_out, ) - search_mode = "hybrid" - if not results: - search_mode = "keyword" + search_mode = mode_out[0] if mode_out else "keyword" summary = f"Found {len(results)} node(s) matching '{query}'" + ( f" (kind={kind})" if kind else "" diff --git a/tests/test_search.py b/tests/test_search.py index e7d90675..e98e4eac 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -249,6 +249,76 @@ def test_fts_query_with_special_chars(self): # Just assert no exception was raised assert isinstance(results, list) + # --- _out_mode tracking --- + + def test_out_mode_fts_only(self): + """_out_mode is 'fts' when only FTS contributes (no embeddings).""" + rebuild_fts_index(self.store) + out: list[str] = [] + results = hybrid_search(self.store, "authenticate", _out_mode=out) + assert out == ["fts"] + assert len(results) > 0 + + def test_out_mode_keyword(self): + """_out_mode is 'keyword' when FTS table is absent and no embeddings.""" + self.store._conn.execute("DROP TABLE IF EXISTS nodes_fts") + self.store._conn.commit() + out: list[str] = [] + results = hybrid_search(self.store, "authenticate", _out_mode=out) + assert out == ["keyword"] + assert len(results) > 0 + + def test_out_mode_keyword_no_results(self): + """_out_mode is 'none' when keyword fallback also returns 0 results.""" + self.store._conn.execute("DROP TABLE IF EXISTS nodes_fts") + self.store._conn.commit() + out: list[str] = [] + results = hybrid_search(self.store, "xyzzy_nonexistent_abc123", _out_mode=out) + assert results == [] + assert out == ["none"] + + def test_out_mode_semantic(self, monkeypatch): + """_out_mode is 'semantic' when only embeddings contribute.""" + import code_review_graph.search as search_mod + + node_id = self.store._conn.execute( + "SELECT id FROM nodes WHERE name = 'authenticate'" + ).fetchone()[0] + + def fake_emb(store, query, limit=50, model=None, provider=None): + return [(node_id, 0.9)] + + monkeypatch.setattr(search_mod, "_embedding_search", fake_emb) + out: list[str] = [] + results = hybrid_search(self.store, "authenticate", _out_mode=out) + assert out == ["semantic"] + assert len(results) > 0 + + def test_out_mode_hybrid(self, monkeypatch): + """_out_mode is 'hybrid' when both FTS and embeddings contribute.""" + import code_review_graph.search as search_mod + + rebuild_fts_index(self.store) + node_id = self.store._conn.execute( + "SELECT id FROM nodes WHERE name = 'authenticate'" + ).fetchone()[0] + + def fake_emb(store, query, limit=50, model=None, provider=None): + return [(node_id, 0.9)] + + monkeypatch.setattr(search_mod, "_embedding_search", fake_emb) + out: list[str] = [] + results = hybrid_search(self.store, "authenticate", _out_mode=out) + assert out == ["hybrid"] + assert len(results) > 0 + + def test_out_mode_empty_query(self): + """_out_mode is 'none' for empty queries (no search ran).""" + out: list[str] = [] + results = hybrid_search(self.store, "", _out_mode=out) + assert results == [] + assert out == ["none"] + def test_fts_rebuild_is_atomic(self): """Regression test for #259: rebuild_fts_index must wrap the DROP + CREATE + INSERT sequence in a single transaction so a crash between diff --git a/tests/test_tools.py b/tests/test_tools.py index 578536d4..d3e79298 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -146,6 +146,26 @@ def test_multi_word_search(self): names = {r.name for r in results} assert "login" in names or "AuthService" in names + def test_search_mode_fts(self, monkeypatch, tmp_path): + """semantic_search_nodes reports search_mode='fts' when only FTS contributes.""" + import code_review_graph.tools.query as query_mod + from code_review_graph.search import rebuild_fts_index + from code_review_graph.tools.query import semantic_search_nodes + + tmp_db = tmp_path / "test.db" + store = GraphStore(tmp_db) + store.upsert_node(NodeInfo( + kind="Function", name="login", file_path="/repo/auth.py", + line_start=1, line_end=10, language="python", + )) + store.commit() + rebuild_fts_index(store) + + monkeypatch.setattr(query_mod, "_get_store", lambda repo_root=None: (store, tmp_path)) + result = semantic_search_nodes("login") + assert result["status"] == "ok" + assert result["search_mode"] == "fts" + def test_search_edges_by_target_name(self): """Search for edges by unqualified target name.""" # Add an edge with bare target name