Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,9 @@ VERCEL_CHANGES_SUMMARY.md
VERCEL_DEPLOYMENT_ASSESSMENT.md
VERCEL_MIGRATION_GUIDE.md
node_modules/

# Build artifacts
**/build/
**/dist/
**/public/*.es.js
venv2/
2 changes: 2 additions & 0 deletions TEST_FAILURES_ANALYSIS.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,5 @@ But the tests expect formatted output with bullet points:
3. **Consider**: Some tests might need to navigate to specific pages first before checking for widgets




36 changes: 34 additions & 2 deletions report_analyst/core/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -823,7 +823,25 @@ async def process_document(
else:
logger.warning("No EVIDENCE field found in result")

# 5. Save complete analysis
# 5. Add chunks to result before saving
# Prepare chunks with all metadata for saving
result_chunks = []
for i, chunk in enumerate(similar_chunks):
chunk_data = {
"text": chunk.get("text", ""),
"chunk_order": i,
"similarity_score": chunk.get("similarity_score", chunk.get("score", 0.0)),
"llm_score": chunk.get("llm_score"),
"is_evidence": chunk.get("is_evidence", False),
"evidence_order": chunk.get("evidence_order"),
"metadata": chunk.get("metadata", {}),
}
result_chunks.append(chunk_data)

result["chunks"] = result_chunks
logger.info(f"[ANALYSIS] Added {len(result_chunks)} chunks to result for saving")

# 6. Save complete analysis
logger.info(
f"[ANALYSIS] Saving analysis result for question {question_id}"
)
Expand All @@ -839,7 +857,7 @@ async def process_document(
"question_set": self.question_set,
}

# Save analysis result
# Save analysis result (includes chunks)
self.cache_manager.save_analysis(
file_path=file_path,
question_id=question_id,
Expand Down Expand Up @@ -1067,6 +1085,15 @@ async def _analyze_chunks(

# Get LLM response
try:
if self.llm is None:
logger.error("LLM not initialized - cannot analyze chunks")
return {
"ANSWER": "Error: LLM not initialized. Please check your API keys and configuration.",
"SCORE": 0,
"EVIDENCE": [],
"GAPS": ["LLM service unavailable"],
"SOURCES": [],
}
response = await self.llm.achat(messages)
response_text = (
response.message.content
Expand Down Expand Up @@ -1417,6 +1444,11 @@ async def _get_similar_chunks(
try:
logger.info(f"Getting similar chunks for query: {query_text[:50]}...")

# Check if embeddings are available
if self.embeddings is None:
logger.error("Embeddings not initialized - cannot get similar chunks")
return []

# Get embedding for the query
query_embedding = self.embeddings.get_text_embedding(query_text)

Expand Down
292 changes: 284 additions & 8 deletions report_analyst/core/cache_manager.py

Large diffs are not rendered by default.

15 changes: 11 additions & 4 deletions report_analyst/core/dataframe_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,20 +62,27 @@ def create_analysis_dataframes(
f"Processing question {question_id} with keys: {list(result.keys())}"
)

# Create analysis row
# Create analysis row - ensure score is a number
score = result.get("SCORE", 0)
try:
score = float(score) if score is not None else 0
except (ValueError, TypeError):
score = 0

analysis_row = {
"Question ID": question_id,
"Analysis": result.get("ANSWER", ""),
"Score": float(result.get("SCORE", 0)),
"Score": score,
"Key Evidence": format_list_field(result.get("EVIDENCE", [])),
"Gaps": format_list_field(result.get("GAPS", [])),
"Sources": format_list_field(result.get("SOURCES", [])),
}
analysis_rows.append(analysis_row)
logger.info(f"Added analysis row for question {question_id}")

# Process chunks - use exactly what's in the database
chunks = data.get("chunks", [])
# Process chunks - check both result and data for chunks
# Chunks can be in result (if added during analysis) or in data (if from database)
chunks = result.get("chunks", data.get("chunks", []))
logger.info(
f"Processing {len(chunks)} chunks for question {question_id}"
)
Expand Down
Loading
Loading