diff --git a/src/functions/smart-search.ts b/src/functions/smart-search.ts index 10a0a77c..866a35f1 100644 --- a/src/functions/smart-search.ts +++ b/src/functions/smart-search.ts @@ -72,6 +72,41 @@ export function resetFollowupStatsForTests(): void { // full content is fetched via memory_lesson_recall when the caller needs it. const LESSON_CONTENT_PREVIEW_CHARS = 240; +// v4-B: detect "who is X" / "what is X" / "what does X mean" patterns +// and pull out X so we can boost hits that name the concept directly. +// BM25 already rewards the exact phrase, but for short named-concept +// queries (typically 2–4 tokens) the question scaffolding ("who is the") +// adds noise that depresses true matches relative to broader, busier +// observations. This is the v4-A "careful generator" regression in +// docs/plans/v4-lineage-test-case-careful-generator.md, surfacing in +// smart-search rather than lineage. +const NAMED_CONCEPT_PATTERNS: RegExp[] = [ + /^\s*who\s+is\s+(?:the\s+|a\s+|an\s+)?(.+?)\s*\??\s*$/i, + /^\s*what\s+is\s+(?:the\s+|a\s+|an\s+)?(.+?)\s*\??\s*$/i, + /^\s*what(?:'s|\sis)\s+(?:the\s+|a\s+|an\s+)?(.+?)\s*\??\s*$/i, + /^\s*what\s+does\s+(.+?)\s+mean\s*\??\s*$/i, + /^\s*who(?:'s|\sis)\s+(?:the\s+|a\s+|an\s+)?(.+?)\s*\??\s*$/i, +]; + +export function extractNamedConcept(query: string): string | null { + if (!query) return null; + for (const re of NAMED_CONCEPT_PATTERNS) { + const m = re.exec(query); + if (m && m[1]) { + const phrase = m[1].trim().replace(/[?.!]+$/, "").trim(); + // Skip degenerate matches (single very short token like "it", + // "this") — those aren't real named concepts. + if (phrase.length >= 3 && phrase.split(/\s+/).length <= 6) { + return phrase; + } + } + } + return null; +} + +const NAMED_CONCEPT_TITLE_BOOST = 2.0; +const NAMED_CONCEPT_BODY_BOOST = 1.3; + export function registerSmartSearchFunction( sdk: ISdk, kv: StateKV, @@ -167,30 +202,72 @@ export function registerSmartSearchFunction( const lessonLimit = Math.min(limit, 10); const includeLessons = data.includeLessons !== false; - // Over-fetch when filtering. Hybrid search can't filter on - // agentId (BM25/vector indexes don't carry it), so we ask the - // searcher for more hits than we need and trim post-filter. 3× - // is a defensible middle ground: enough headroom for a small - // workload, capped at 300 so a 100-limit request never asks for - // thousands of hits. - const overFetchLimit = filterAgentId - ? Math.min(limit * 3, 300) - : limit; - - const [hybridResults, lessons] = await Promise.all([ - searchFn(data.query, overFetchLimit), + // Run observation hybrid-search and lesson recall in parallel so the + // extra lesson lookup adds no wallclock when the underlying calls + // can overlap. Lesson recall is best-effort: if mem::lesson-recall + // fails or returns unexpected shape, log + fall back to empty. + // Over-fetch when EITHER a named-concept query (post-rank boost + // needs material when BM25 mis-ranks the exact match under + // question-scaffolding noise) OR an agentId filter is active + // (hybrid search can't filter on agentId — BM25/vector indexes + // don't carry it — so we trim post-filter). 3×, capped at 300. + const namedConcept = extractNamedConcept(data.query); + const searchLimit = + namedConcept || filterAgentId ? Math.min(limit * 3, 300) : limit; + const [rawHybridResults, rawLessons] = await Promise.all([ + searchFn(data.query, searchLimit), includeLessons - ? recallLessons(sdk, data.query, lessonLimit, data.project) + ? recallLessons(sdk, data.query, lessonLimit, data.project, namedConcept ?? undefined) : Promise.resolve([]), ]); - const filteredHybrid = filterAgentId - ? hybridResults - .filter((r) => r.observation.agentId === filterAgentId) - .slice(0, limit) - : hybridResults.slice(0, limit); + // Filter by agentId first (hybrid indexes can't), then apply the + // named-concept boost on the surviving set, then trim to limit. + let workingHybrid = filterAgentId + ? rawHybridResults.filter((r) => r.observation.agentId === filterAgentId) + : rawHybridResults; + let lessons = rawLessons; + if (namedConcept) { + const phrase = namedConcept.toLowerCase(); + const boostHybrid = (r: HybridSearchResult): HybridSearchResult => { + const title = (r.observation.title || "").toLowerCase(); + const narrative = (r.observation.narrative || "").toLowerCase(); + // Multiplicative: title AND narrative both match → 2.0 × 1.3 = 2.6×. + // CodeRabbit caught the prior else-if capping dual matches at 2.0×. + let mult = 1; + if (title.includes(phrase)) mult *= NAMED_CONCEPT_TITLE_BOOST; + if (narrative.includes(phrase)) mult *= NAMED_CONCEPT_BODY_BOOST; + return mult === 1 ? r : { ...r, combinedScore: r.combinedScore * mult }; + }; + workingHybrid = workingHybrid + .map(boostHybrid) + .sort((a, b) => b.combinedScore - a.combinedScore); + // Use boostMatched (set by recallLessons against the full + // pre-truncation content) instead of re-scanning the 240-char + // preview here. Falls back to scanning preview if boostMatched + // is absent (recallLessons called without boostPhrase). + lessons = rawLessons + .map((l) => { + const matched = + (l as CompactLessonResult & { boostMatched?: boolean }).boostMatched === true || + (typeof l.content === "string" && l.content.toLowerCase().includes(phrase)); + if (!matched) return l; + return { ...l, score: (l.score ?? 0) * NAMED_CONCEPT_TITLE_BOOST }; + }) + .sort((a, b) => (b.score ?? 0) - (a.score ?? 0)); + logger.info("Smart search named-concept boost applied", { + query: data.query, + concept: namedConcept, + boostedHybrid: workingHybrid.filter((r) => { + const t = (r.observation.title || "").toLowerCase(); + const n = (r.observation.narrative || "").toLowerCase(); + return t.includes(phrase) || n.includes(phrase); + }).length, + }); + } + const hybridResults = workingHybrid.slice(0, limit); - const compact: CompactSearchResult[] = filteredHybrid.map((r) => ({ + const compact: CompactSearchResult[] = hybridResults.map((r) => ({ obsId: r.observation.id, sessionId: r.sessionId, title: r.observation.title, @@ -273,6 +350,7 @@ async function recallLessons( query: string, limit: number, project?: string, + boostPhrase?: string, ): Promise { try { const result = (await sdk.trigger({ @@ -280,18 +358,28 @@ async function recallLessons( payload: { query, limit, project }, })) as { success?: boolean; lessons?: Array }; if (!result?.success || !Array.isArray(result.lessons)) return []; - return result.lessons.map((l) => ({ - lessonId: l.id, - content: - l.content.length > LESSON_CONTENT_PREVIEW_CHARS - ? l.content.slice(0, LESSON_CONTENT_PREVIEW_CHARS) + "…" - : l.content, - confidence: l.confidence, - score: l.score ?? l.confidence, - createdAt: l.createdAt, - project: l.project, - tags: l.tags ?? [], - })); + const phraseLower = boostPhrase?.toLowerCase(); + return result.lessons.map((l) => { + // Decide boost match against the FULL pre-truncation content so a + // phrase that lives past the 240-char preview window can still + // signal relevance. CodeRabbit caught this on #571. + const boostMatched = phraseLower + ? `${l.content ?? ""} ${l.context ?? ""}`.toLowerCase().includes(phraseLower) + : false; + return { + lessonId: l.id, + content: + l.content.length > LESSON_CONTENT_PREVIEW_CHARS + ? l.content.slice(0, LESSON_CONTENT_PREVIEW_CHARS) + "…" + : l.content, + confidence: l.confidence, + score: l.score ?? l.confidence, + createdAt: l.createdAt, + project: l.project, + tags: l.tags ?? [], + boostMatched, + }; + }); } catch (err) { logger.warn("Smart search: mem::lesson-recall failed; returning empty lesson list", { error: err instanceof Error ? err.message : String(err), diff --git a/src/types.ts b/src/types.ts index 2110aa82..1764cba5 100644 --- a/src/types.ts +++ b/src/types.ts @@ -278,6 +278,12 @@ export interface CompactLessonResult { createdAt: string; project?: string; tags: string[]; + /** + * Set by recallLessons when the FULL pre-truncation content + * matched the named-concept boost phrase. Smart-search uses this + * to skip re-scanning the truncated preview. See #571. + */ + boostMatched?: boolean; } export interface TimelineEntry { diff --git a/test/smart-search.test.ts b/test/smart-search.test.ts index 9d0c94e0..62b6edfd 100644 --- a/test/smart-search.test.ts +++ b/test/smart-search.test.ts @@ -292,3 +292,109 @@ describe("Smart Search Function", () => { }); }); }); + +import { extractNamedConcept } from "../src/functions/smart-search.js"; + +describe("extractNamedConcept (v4-B)", () => { + it("matches 'who is X' / 'what is X' / 'what does X mean'", () => { + expect(extractNamedConcept("who is the careful generator?")).toBe("careful generator"); + expect(extractNamedConcept("what is a circuit breaker")).toBe("circuit breaker"); + expect(extractNamedConcept("what's the auth middleware?")).toBe("auth middleware"); + expect(extractNamedConcept("what does eventual consistency mean?")).toBe("eventual consistency"); + }); + it("returns null for non-named-concept queries", () => { + expect(extractNamedConcept("fix the bug in observe.ts")).toBeNull(); + expect(extractNamedConcept("recent decisions")).toBeNull(); + expect(extractNamedConcept("")).toBeNull(); + }); + it("rejects degenerate phrases (too short or too long)", () => { + expect(extractNamedConcept("what is it?")).toBeNull(); + expect(extractNamedConcept("what is x")).toBeNull(); + expect(extractNamedConcept( + "what is the eight token thing we discussed earlier on the call", + )).toBeNull(); // >6 tokens + }); +}); + +describe("Smart Search named-concept boost (v4-B)", () => { + let sdk: ReturnType; + let kv: ReturnType; + let searchResults: HybridSearchResult[]; + + beforeEach(async () => { + sdk = mockSdk(); + kv = mockKV(); + + // Two observations: the one whose TITLE names the concept ("careful + // generator") starts with a LOWER bm25 score so we can prove the + // boost re-ranks it above the busier observation. + const obsNamed = makeObs({ + id: "obs_named", + sessionId: "ses_1", + title: "Tier 2 — careful generator (Qwen3.6-35B-A3B-FP8)", + narrative: "Picked Qwen3.6 for the careful generator role on vast.", + }); + const obsBusy = makeObs({ + id: "obs_busy", + sessionId: "ses_1", + title: "Refactor the request handler — moved validation", + narrative: "Random unrelated session work.", + }); + + searchResults = [ + // BM25 prefers the busier observation (more tokens), so without + // boost the named-concept obs ranks SECOND. + { + observation: obsBusy, + bm25Score: 0.9, + vectorScore: 0, + combinedScore: 0.9, + sessionId: "ses_1", + }, + { + observation: obsNamed, + bm25Score: 0.6, + vectorScore: 0, + combinedScore: 0.6, + sessionId: "ses_1", + }, + ]; + + const session: Session = { + id: "ses_1", + project: "p", + cwd: "/tmp", + startedAt: "2026-02-01T00:00:00Z", + status: "completed", + observationCount: 2, + }; + await kv.set("mem:sessions", "ses_1", session); + await kv.set("mem:obs:ses_1", "obs_named", obsNamed); + await kv.set("mem:obs:ses_1", "obs_busy", obsBusy); + + const searchFn = async (_query: string, _limit: number) => searchResults; + registerSmartSearchFunction(sdk as never, kv as never, searchFn); + }); + + it("named-concept query boosts the title-matching observation to rank #1", async () => { + const result = (await sdk.trigger("mem::smart-search", { + query: "who is the careful generator?", + includeLessons: false, + })) as { results: CompactSearchResult[] }; + expect(result.results.length).toBe(2); + expect(result.results[0].obsId).toBe("obs_named"); // title-boosted above busier obs + expect(result.results[1].obsId).toBe("obs_busy"); + // Score on the boosted hit must exceed the original 0.6 by the + // title-boost factor (2.0x). + expect(result.results[0].score).toBeGreaterThan(1.0); + }); + + it("non-named-concept query preserves original ordering", async () => { + const result = (await sdk.trigger("mem::smart-search", { + query: "refactor request handler", + includeLessons: false, + })) as { results: CompactSearchResult[] }; + expect(result.results[0].obsId).toBe("obs_busy"); // unchanged: bm25 0.9 > 0.6 + expect(result.results[1].obsId).toBe("obs_named"); + }); +});