From e41d18f9ac07d21ca4a7c22f7625c5460f283f2f Mon Sep 17 00:00:00 2001 From: Steve Zhang Date: Fri, 10 Apr 2026 01:54:14 -0400 Subject: [PATCH 1/2] Add RecursiveCharacterTextSplitter and multi-query retrieval - Chunk documents (1000 chars, 200 overlap) before embedding for better retrieval precision - Generate multiple query variants using LLM to improve recall - Retrieve docs for all queries in parallel and deduplicate results Co-Authored-By: Claude Opus 4.6 (1M context) --- backend/src/ingestion_graph/graph.ts | 11 +++++++- backend/src/retrieval_graph/graph.ts | 38 ++++++++++++++++++++++++-- backend/src/retrieval_graph/prompts.ts | 14 +++++++++- 3 files changed, 58 insertions(+), 5 deletions(-) diff --git a/backend/src/ingestion_graph/graph.ts b/backend/src/ingestion_graph/graph.ts index 49b7de0..cdcf538 100644 --- a/backend/src/ingestion_graph/graph.ts +++ b/backend/src/ingestion_graph/graph.ts @@ -4,6 +4,7 @@ import { RunnableConfig } from '@langchain/core/runnables'; import { StateGraph, END, START } from '@langchain/langgraph'; +import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; import fs from 'fs/promises'; import { IndexStateAnnotation } from './state.js'; @@ -14,6 +15,11 @@ import { } from './configuration.js'; import { reduceDocs } from '../shared/state.js'; +const textSplitter = new RecursiveCharacterTextSplitter({ + chunkSize: 1000, + chunkOverlap: 200, +}); + async function ingestDocs( state: typeof IndexStateAnnotation.State, config?: RunnableConfig, @@ -37,8 +43,11 @@ async function ingestDocs( docs = reduceDocs([], docs); } + // Split documents into smaller chunks for better retrieval + const splitDocs = await textSplitter.splitDocuments(docs); + const retriever = await makeRetriever(config); - await retriever.addDocuments(docs); + await retriever.addDocuments(splitDocs); return { docs: 'delete' }; } diff --git a/backend/src/retrieval_graph/graph.ts b/backend/src/retrieval_graph/graph.ts index e3098ce..14c7359 100644 --- a/backend/src/retrieval_graph/graph.ts +++ b/backend/src/retrieval_graph/graph.ts @@ -4,7 +4,11 @@ import { makeRetriever } from '../shared/retrieval.js'; import { formatDocs } from './utils.js'; import { HumanMessage } from '@langchain/core/messages'; import { z } from 'zod'; -import { RESPONSE_SYSTEM_PROMPT, ROUTER_SYSTEM_PROMPT } from './prompts.js'; +import { + RESPONSE_SYSTEM_PROMPT, + ROUTER_SYSTEM_PROMPT, + MULTI_QUERY_PROMPT, +} from './prompts.js'; import { RunnableConfig } from '@langchain/core/runnables'; import { AgentConfigurationAnnotation, @@ -74,10 +78,38 @@ async function retrieveDocuments( state: typeof AgentStateAnnotation.State, config: RunnableConfig, ): Promise { + const configuration = ensureAgentConfiguration(config); + const model = await loadChatModel(configuration.queryModel); const retriever = await makeRetriever(config); - const response = await retriever.invoke(state.query); - return { documents: response }; + // Generate multiple query variants for better retrieval coverage + const formattedPrompt = await MULTI_QUERY_PROMPT.invoke({ + query: state.query, + }); + const queryResponse = await model.invoke(formattedPrompt.toString()); + const queries = [ + state.query, + ...String(queryResponse.content) + .split('\n') + .map((q) => q.trim()) + .filter((q) => q.length > 0), + ]; + + // Retrieve documents for all queries in parallel + const allResults = await Promise.all( + queries.map((q) => retriever.invoke(q)), + ); + + // Deduplicate by page content + const seen = new Set(); + const uniqueDocs = allResults.flat().filter((doc) => { + const key = doc.pageContent; + if (seen.has(key)) return false; + seen.add(key); + return true; + }); + + return { documents: uniqueDocs }; } async function generateResponse( diff --git a/backend/src/retrieval_graph/prompts.ts b/backend/src/retrieval_graph/prompts.ts index 7fee92e..8481b28 100644 --- a/backend/src/retrieval_graph/prompts.ts +++ b/backend/src/retrieval_graph/prompts.ts @@ -23,4 +23,16 @@ const RESPONSE_SYSTEM_PROMPT = ChatPromptTemplate.fromMessages([ ], ]); -export { ROUTER_SYSTEM_PROMPT, RESPONSE_SYSTEM_PROMPT }; +const MULTI_QUERY_PROMPT = ChatPromptTemplate.fromMessages([ + [ + 'system', + `You are an AI assistant that generates multiple search queries to improve document retrieval. +Given a user question, generate 3 different versions of the question that capture different aspects or phrasings. +Each query should approach the question from a different angle to maximize the chance of finding relevant documents. + +Return ONLY the 3 queries, one per line, with no numbering or prefixes.`, + ], + ['human', '{query}'], +]); + +export { ROUTER_SYSTEM_PROMPT, RESPONSE_SYSTEM_PROMPT, MULTI_QUERY_PROMPT }; From b55ea63ba20e8da2c604048d353e8e0be7aafee7 Mon Sep 17 00:00:00 2001 From: Steve Zhang Date: Fri, 10 Apr 2026 02:14:51 -0400 Subject: [PATCH 2/2] Cap multi-query results and add expandable source citations - Limit retrieved docs to k*2 to reduce noise in sources - Click source cards to expand and view the actual chunk text Co-Authored-By: Claude Opus 4.6 (1M context) --- backend/src/retrieval_graph/graph.ts | 6 ++++-- frontend/components/chat-message.tsx | 22 ++++++++++++++++++---- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/backend/src/retrieval_graph/graph.ts b/backend/src/retrieval_graph/graph.ts index 14c7359..7a3579f 100644 --- a/backend/src/retrieval_graph/graph.ts +++ b/backend/src/retrieval_graph/graph.ts @@ -100,7 +100,7 @@ async function retrieveDocuments( queries.map((q) => retriever.invoke(q)), ); - // Deduplicate by page content + // Deduplicate by page content, preserving order (original query first) const seen = new Set(); const uniqueDocs = allResults.flat().filter((doc) => { const key = doc.pageContent; @@ -109,7 +109,9 @@ async function retrieveDocuments( return true; }); - return { documents: uniqueDocs }; + // Cap results — original query docs are first, so most relevant are kept + const maxDocs = configuration.k * 2; + return { documents: uniqueDocs.slice(0, maxDocs) }; } async function generateResponse( diff --git a/frontend/components/chat-message.tsx b/frontend/components/chat-message.tsx index 97883c8..dce739d 100644 --- a/frontend/components/chat-message.tsx +++ b/frontend/components/chat-message.tsx @@ -21,6 +21,7 @@ interface ChatMessageProps { export function ChatMessage({ message }: ChatMessageProps) { const isUser = message.role === 'user'; const [copied, setCopied] = useState(false); + const [expandedSource, setExpandedSource] = useState(null); const isLoading = message.role === 'assistant' && message.content === ''; const handleCopy = async () => { @@ -78,17 +79,30 @@ export function ChatMessage({ message }: ChatMessageProps) { {message.sources?.map((source, index) => ( + setExpandedSource( + expandedSource === index ? null : index, + ) + } >

- {source.metadata?.source || - source.metadata?.filename || - 'N/A'} + {source.metadata?.filename || + (source.metadata?.source + ? source.metadata.source + .split(/[/\\]/) + .pop() + : 'N/A')}

Page {source.metadata?.loc?.pageNumber || 'N/A'}

+ {expandedSource === index && ( +

+ {source.pageContent || 'No content available'} +

+ )}
))}