From ea5a7b0aa9c8797bf0d2e19092486f98692be161 Mon Sep 17 00:00:00 2001
From: axect <axect.tg@proton.me>
Date: Tue, 7 Apr 2026 15:11:29 +0800
Subject: [PATCH 1/5] feat: overhaul paper review with expert-reviewer prompts
 and publication-quality output

- Add 3 new review sections: Reproducibility Assessment, Impact & Significance, Reading Guide
- Rewrite all 12 existing prompts with senior reviewer persona, structured evaluation criteria,
  evidence requirements, and anti-patterns
- Richer JSON schemas: severity badges on weaknesses, novelty levels on contributions,
  formal recommendation (strong_accept..reject), confidence levels, structured assumptions
- Publication-quality markdown rendering: metadata table, table of contents, severity badges,
  inline tags, structured sub-sections, blockquote highlights
- Backward compatible with existing cached review data
---
 src/arxiv_explorer/cli/review.py              |   9 +-
 src/arxiv_explorer/core/models.py             |   3 +
 src/arxiv_explorer/services/review_service.py | 858 +++++++++++++++---
 tests/test_review_service.py                  |  44 +-
 4 files changed, 766 insertions(+), 148 deletions(-)

diff --git a/src/arxiv_explorer/cli/review.py b/src/arxiv_explorer/cli/review.py
index 2d4fa49..985ea5e 100644
--- a/src/arxiv_explorer/cli/review.py
+++ b/src/arxiv_explorer/cli/review.py
@@ -26,13 +26,16 @@
     ReviewSectionType.SECTION_SUMMARIES: "Section Summaries",
     ReviewSectionType.METHODOLOGY: "Methodology Analysis",
     ReviewSectionType.MATH_FORMULATIONS: "Math Formulations",
-    ReviewSectionType.FIGURES: "Figure Descriptions",
-    ReviewSectionType.TABLES: "Table Descriptions",
+    ReviewSectionType.FIGURES: "Figure Analysis",
+    ReviewSectionType.TABLES: "Table Analysis",
     ReviewSectionType.EXPERIMENTAL_RESULTS: "Experimental Results",
+    ReviewSectionType.REPRODUCIBILITY: "Reproducibility Assessment",
     ReviewSectionType.STRENGTHS_WEAKNESSES: "Strengths & Weaknesses",
+    ReviewSectionType.IMPACT_SIGNIFICANCE: "Impact & Significance",
     ReviewSectionType.RELATED_WORK: "Related Work",
     ReviewSectionType.GLOSSARY: "Glossary",
-    ReviewSectionType.QUESTIONS: "Questions",
+    ReviewSectionType.QUESTIONS: "Questions for Authors",
+    ReviewSectionType.READING_GUIDE: "Reading Guide",
 }
 
 
diff --git a/src/arxiv_explorer/core/models.py b/src/arxiv_explorer/core/models.py
index 766910b..839fc5e 100644
--- a/src/arxiv_explorer/core/models.py
+++ b/src/arxiv_explorer/core/models.py
@@ -60,10 +60,13 @@ class ReviewSectionType(str, Enum):
     FIGURES = "figures"
     TABLES = "tables"
     EXPERIMENTAL_RESULTS = "experimental_results"
+    REPRODUCIBILITY = "reproducibility"
     STRENGTHS_WEAKNESSES = "strengths_weaknesses"
+    IMPACT_SIGNIFICANCE = "impact_significance"
     RELATED_WORK = "related_work"
     GLOSSARY = "glossary"
     QUESTIONS = "questions"
+    READING_GUIDE = "reading_guide"
 
 
 @dataclass
diff --git a/src/arxiv_explorer/services/review_service.py b/src/arxiv_explorer/services/review_service.py
index d5028fb..0449955 100644
--- a/src/arxiv_explorer/services/review_service.py
+++ b/src/arxiv_explorer/services/review_service.py
@@ -37,12 +37,25 @@ class PaperReviewService:
         (ReviewSectionType.FIGURES, True),
         (ReviewSectionType.TABLES, True),
         (ReviewSectionType.EXPERIMENTAL_RESULTS, True),
+        (ReviewSectionType.REPRODUCIBILITY, True),
         (ReviewSectionType.STRENGTHS_WEAKNESSES, False),
+        (ReviewSectionType.IMPACT_SIGNIFICANCE, False),
         (ReviewSectionType.RELATED_WORK, True),
         (ReviewSectionType.GLOSSARY, False),
         (ReviewSectionType.QUESTIONS, False),
+        (ReviewSectionType.READING_GUIDE, False),
     ]
 
+    # Shared reviewer persona prefix for all prompts
+    _REVIEWER_PERSONA = (
+        "You are a senior reviewer for a top-tier venue (e.g., NeurIPS, ICML, Nature, JMLR). "
+        "You have deep expertise in the paper's domain and extensive experience evaluating "
+        "research for novelty, rigor, clarity, and significance. "
+        "Your analysis must be evidence-grounded: cite specific sections, equations, figures, "
+        "or tables from the paper to support every claim. "
+        "Avoid vague praise or criticism — be precise and constructive.\n\n"
+    )
+
     def generate_review(
         self,
         paper: Paper,
@@ -90,6 +103,7 @@ def generate_review(
                     ReviewSectionType.FIGURES,
                     ReviewSectionType.TABLES,
                     ReviewSectionType.MATH_FORMULATIONS,
+                    ReviewSectionType.REPRODUCIBILITY,
                 ):
                     empty = self._empty_section_data(section_type)
                     sections_data[section_type] = empty
@@ -165,70 +179,169 @@ def delete_review(self, arxiv_id: str) -> bool:
             conn.commit()
             return cursor.rowcount > 0
 
+    # ── Severity / Rating Formatting Helpers ────────────────────────────
+
+    _SEVERITY_BADGES: dict[str, str] = {
+        "critical": "**[CRITICAL]**",
+        "major": "**[MAJOR]**",
+        "moderate": "[MODERATE]",
+        "minor": "[minor]",
+    }
+
+    _RECOMMENDATION_LABELS: dict[str, str] = {
+        "strong_accept": "Strong Accept",
+        "accept": "Accept",
+        "weak_accept": "Weak Accept",
+        "borderline": "Borderline",
+        "weak_reject": "Weak Reject",
+        "reject": "Reject",
+    }
+
+    _SIGNIFICANCE_LABELS: dict[str, str] = {
+        "transformative": "Transformative",
+        "significant": "Significant",
+        "solid_contribution": "Solid Contribution",
+        "incremental": "Incremental",
+        "limited": "Limited",
+    }
+
     def render_markdown(
         self,
         review: PaperReview,
         language: Language = Language.EN,
     ) -> str:
-        """Render a PaperReview into final Markdown string."""
+        """Render a PaperReview into publication-quality Markdown."""
         parts: list[str] = []
+        sw = review.sections.get(ReviewSectionType.STRENGTHS_WEAKNESSES, {})
+        imp = review.sections.get(ReviewSectionType.IMPACT_SIGNIFICANCE, {})
 
-        # --- Header ---
+        # ═══════════════════════════════════════════════════════════════
+        # HEADER & METADATA
+        # ═══════════════════════════════════════════════════════════════
         parts.append(f"# {review.title}\n")
         author_str = ", ".join(review.authors[:10])
         if len(review.authors) > 10:
             author_str += f" (+{len(review.authors) - 10} more)"
-        parts.append(f"**Authors:** {author_str}  ")
+
+        source_label = (
+            "Full text" if review.source_type == "full_text" else "Abstract only"
+        )
+
+        # Quick Reference Card
+        parts.append("| | |")
+        parts.append("|:--|:--|")
+        parts.append(f"| **Authors** | {author_str} |")
         parts.append(
-            f"**arXiv ID:** [{review.arxiv_id}](https://arxiv.org/abs/{review.arxiv_id})  "
+            f"| **arXiv** | [{review.arxiv_id}](https://arxiv.org/abs/{review.arxiv_id}) |"
         )
-        parts.append(f"**Categories:** {', '.join(review.categories)}  ")
-        parts.append(f"**Published:** {review.published.strftime('%Y-%m-%d')}  ")
+        parts.append(f"| **Categories** | {', '.join(review.categories)} |")
+        parts.append(f"| **Published** | {review.published.strftime('%Y-%m-%d')} |")
         if review.pdf_url:
-            parts.append(f"**PDF:** [{review.pdf_url}]({review.pdf_url})  ")
-        source_label = (
-            "Full text analysis" if review.source_type == "full_text" else "Abstract-only analysis"
-        )
-        parts.append(f"**Source:** {source_label}")
+            parts.append(f"| **PDF** | [{review.pdf_url}]({review.pdf_url}) |")
+        parts.append(f"| **Analysis Source** | {source_label} |")
+
+        # Inject recommendation & significance into the card if available
+        rec = sw.get("recommendation", "")
+        if rec:
+            rec_label = self._RECOMMENDATION_LABELS.get(rec, rec)
+            parts.append(f"| **Recommendation** | **{rec_label}** |")
+        sig = imp.get("significance_rating", "")
+        if sig:
+            sig_label = self._SIGNIFICANCE_LABELS.get(sig, sig)
+            parts.append(f"| **Significance** | {sig_label} |")
+        confidence = sw.get("confidence", "")
+        if confidence:
+            parts.append(f"| **Reviewer Confidence** | {confidence} |")
         parts.append("")
 
-        # --- Executive Summary ---
+        # ═══════════════════════════════════════════════════════════════
+        # TABLE OF CONTENTS
+        # ═══════════════════════════════════════════════════════════════
+        toc_sections = [
+            (ReviewSectionType.EXECUTIVE_SUMMARY, "Executive Summary"),
+            (ReviewSectionType.KEY_CONTRIBUTIONS, "Key Contributions"),
+            (ReviewSectionType.SECTION_SUMMARIES, "Section-by-Section Summary"),
+            (ReviewSectionType.METHODOLOGY, "Methodology Analysis"),
+            (ReviewSectionType.MATH_FORMULATIONS, "Mathematical Formulations"),
+            (ReviewSectionType.FIGURES, "Figure Analysis"),
+            (ReviewSectionType.TABLES, "Table Analysis"),
+            (ReviewSectionType.EXPERIMENTAL_RESULTS, "Experimental Results"),
+            (ReviewSectionType.REPRODUCIBILITY, "Reproducibility Assessment"),
+            (ReviewSectionType.STRENGTHS_WEAKNESSES, "Strengths & Weaknesses"),
+            (ReviewSectionType.IMPACT_SIGNIFICANCE, "Impact & Significance"),
+            (ReviewSectionType.RELATED_WORK, "Related Work"),
+            (ReviewSectionType.GLOSSARY, "Glossary"),
+            (ReviewSectionType.QUESTIONS, "Questions for Authors"),
+            (ReviewSectionType.READING_GUIDE, "Reading Guide"),
+        ]
+        toc_items = []
+        for st, label in toc_sections:
+            data = review.sections.get(st, {})
+            if data:
+                anchor = label.lower().replace(" ", "-").replace("&", "").replace("--", "-")
+                toc_items.append(f"[{label}](#{anchor})")
+        if toc_items:
+            parts.append(f"**Contents:** {' | '.join(toc_items)}\n")
+
+        parts.append("---\n")
+
+        # ═══════════════════════════════════════════════════════════════
+        # EXECUTIVE SUMMARY
+        # ═══════════════════════════════════════════════════════════════
         es = review.sections.get(ReviewSectionType.EXECUTIVE_SUMMARY, {})
         if es:
             parts.append("## Executive Summary\n")
             if es.get("tldr"):
-                parts.append(f"**TL;DR:** {es['tldr']}\n")
+                parts.append(f"> {es['tldr']}\n")
             if es.get("research_question"):
                 parts.append(f"**Research Question:** {es['research_question']}\n")
             if es.get("approach_summary"):
                 parts.append(f"**Approach:** {es['approach_summary']}\n")
             if es.get("main_result"):
                 parts.append(f"**Main Result:** {es['main_result']}\n")
-
-        # --- Key Contributions ---
+            if es.get("novelty_claim"):
+                parts.append(f"**Novelty:** {es['novelty_claim']}\n")
+            if es.get("one_sentence_verdict"):
+                parts.append(f"**Verdict:** *{es['one_sentence_verdict']}*\n")
+            if es.get("target_audience"):
+                parts.append(f"**Target Audience:** {es['target_audience']}\n")
+
+        # ═══════════════════════════════════════════════════════════════
+        # KEY CONTRIBUTIONS
+        # ═══════════════════════════════════════════════════════════════
         kc = review.sections.get(ReviewSectionType.KEY_CONTRIBUTIONS, {})
         if kc and kc.get("contributions"):
             parts.append("## Key Contributions\n")
-            for c in kc["contributions"]:
+            for i, c in enumerate(kc["contributions"], 1):
                 ctype = c.get("type", "general")
-                parts.append(f"- **[{ctype}]** {c.get('contribution', '')}")
+                novelty = c.get("novelty", "")
+                novelty_tag = f" `{novelty}`" if novelty else ""
+                parts.append(f"**{i}. [{ctype}]{novelty_tag}** {c.get('contribution', '')}\n")
                 if c.get("significance"):
-                    parts.append(f"  - *Significance:* {c['significance']}")
-            parts.append("")
+                    parts.append(f"- *Significance:* {c['significance']}")
+                if c.get("evidence_strength"):
+                    parts.append(f"- *Evidence:* {c['evidence_strength']}")
+                parts.append("")
 
-        # --- Section-by-Section Summary ---
+        # ═══════════════════════════════════════════════════════════════
+        # SECTION-BY-SECTION SUMMARY
+        # ═══════════════════════════════════════════════════════════════
         ss = review.sections.get(ReviewSectionType.SECTION_SUMMARIES, {})
         if ss and ss.get("sections"):
             parts.append("## Section-by-Section Summary\n")
             for sec in ss["sections"]:
-                parts.append(f"### {sec.get('heading', 'Unknown Section')}\n")
+                clarity = sec.get("clarity_assessment", "")
+                clarity_tag = f" `{clarity}`" if clarity else ""
+                parts.append(f"### {sec.get('heading', 'Unknown Section')}{clarity_tag}\n")
                 parts.append(f"{sec.get('summary', '')}\n")
                 if sec.get("key_points"):
                     for kp in sec["key_points"]:
                         parts.append(f"- {kp}")
                     parts.append("")
 
-        # --- Methodology Analysis ---
+        # ═══════════════════════════════════════════════════════════════
+        # METHODOLOGY ANALYSIS
+        # ═══════════════════════════════════════════════════════════════
         meth = review.sections.get(ReviewSectionType.METHODOLOGY, {})
         if meth:
             parts.append("## Methodology Analysis\n")
@@ -236,136 +349,333 @@ def render_markdown(
                 parts.append(f"{meth['overview']}\n")
             if meth.get("steps"):
                 for step in meth["steps"]:
-                    parts.append(f"### {step.get('step_name', 'Step')}\n")
+                    novelty = step.get("novelty", "standard")
+                    novelty_badge = f" `{novelty}`" if novelty != "standard" else ""
+                    parts.append(f"### {step.get('step_name', 'Step')}{novelty_badge}\n")
                     parts.append(f"{step.get('description', '')}\n")
-                    novelty = step.get("novelty", "")
-                    if novelty and novelty != "standard":
-                        parts.append(f"*Novelty:* {novelty}\n")
+                    if step.get("justification"):
+                        parts.append(f"*Justification:* {step['justification']}\n")
             if meth.get("assumptions"):
-                parts.append("**Assumptions:**\n")
+                parts.append("### Assumptions\n")
+                # Support both old (list of strings) and new (list of dicts) format
                 for a in meth["assumptions"]:
-                    parts.append(f"- {a}")
+                    if isinstance(a, dict):
+                        parts.append(f"- **{a.get('assumption', '')}**")
+                        if a.get("validity"):
+                            parts.append(f"  - *Validity:* {a['validity']}")
+                        if a.get("impact_if_violated"):
+                            parts.append(f"  - *If violated:* {a['impact_if_violated']}")
+                    else:
+                        parts.append(f"- {a}")
+                parts.append("")
+            if meth.get("limitations"):
+                parts.append("### Methodological Limitations\n")
+                for lim in meth["limitations"]:
+                    parts.append(f"- {lim}")
                 parts.append("")
             if meth.get("complexity_notes"):
                 parts.append(f"**Complexity:** {meth['complexity_notes']}\n")
 
-        # --- Figure Descriptions ---
+        # ═══════════════════════════════════════════════════════════════
+        # MATHEMATICAL FORMULATIONS
+        # ═══════════════════════════════════════════════════════════════
+        math_data = review.sections.get(ReviewSectionType.MATH_FORMULATIONS, {})
+        if math_data and math_data.get("formulations"):
+            parts.append("## Mathematical Formulations\n")
+            for f in math_data["formulations"]:
+                role = f.get("role", "")
+                role_tag = f" `{role}`" if role else ""
+                parts.append(f"### {f.get('equation_label', 'Equation')}{role_tag}\n")
+                if f.get("latex"):
+                    parts.append(f"$$\n{f['latex']}\n$$\n")
+                if f.get("plain_language"):
+                    parts.append(f"{f['plain_language']}\n")
+                if f.get("variables"):
+                    parts.append(f"*Variables:* {f['variables']}\n")
+                if f.get("correctness_note") and f["correctness_note"] != "appears sound":
+                    parts.append(f"> **Note:** {f['correctness_note']}\n")
+
+        # ═══════════════════════════════════════════════════════════════
+        # FIGURE ANALYSIS
+        # ═══════════════════════════════════════════════════════════════
         figs = review.sections.get(ReviewSectionType.FIGURES, {})
         if figs and figs.get("figures"):
-            parts.append("## Figure Descriptions\n")
+            parts.append("## Figure Analysis\n")
             for fig in figs["figures"]:
-                parts.append(f"### Figure {fig.get('figure_id', '?')}\n")
+                sig = fig.get("significance", "")
+                sig_tag = f" `{sig}`" if sig else ""
+                parts.append(f"### Figure {fig.get('figure_id', '?')}{sig_tag}\n")
                 parts.append(f"{fig.get('description', '')}\n")
-                if fig.get("significance"):
-                    parts.append(f"*Significance:* {fig['significance']}\n")
+                if fig.get("claim_supported"):
+                    parts.append(f"- *Supports:* {fig['claim_supported']}")
+                if fig.get("caption_quality"):
+                    parts.append(f"- *Caption:* {fig['caption_quality']}")
+                if fig.get("presentation_issues") and fig["presentation_issues"] != "none":
+                    parts.append(f"- *Issues:* {fig['presentation_issues']}")
+                parts.append("")
 
-        # --- Table Descriptions ---
+        # ═══════════════════════════════════════════════════════════════
+        # TABLE ANALYSIS
+        # ═══════════════════════════════════════════════════════════════
         tbls = review.sections.get(ReviewSectionType.TABLES, {})
         if tbls and tbls.get("tables"):
-            parts.append("## Table Descriptions\n")
+            parts.append("## Table Analysis\n")
             for tbl in tbls["tables"]:
                 cap = tbl.get("caption", "")
-                parts.append(f"### Table {tbl.get('table_id', '?')}: {cap}\n")
+                ttype = tbl.get("table_type", "")
+                type_tag = f" `{ttype}`" if ttype else ""
+                parts.append(f"### Table {tbl.get('table_id', '?')}: {cap}{type_tag}\n")
                 parts.append(f"{tbl.get('description', '')}\n")
                 if tbl.get("key_findings"):
-                    parts.append(f"*Key findings:* {tbl['key_findings']}\n")
-
-        # --- Mathematical Formulations ---
-        math = review.sections.get(ReviewSectionType.MATH_FORMULATIONS, {})
-        if math and math.get("formulations"):
-            parts.append("## Mathematical Formulations\n")
-            for f in math["formulations"]:
-                parts.append(f"### {f.get('equation_label', 'Equation')}\n")
-                if f.get("latex"):
-                    parts.append(f"$$\n{f['latex']}\n$$\n")
-                if f.get("plain_language"):
-                    parts.append(f"**Plain language:** {f['plain_language']}\n")
-                if f.get("role"):
-                    parts.append(f"*Role:* {f['role']}\n")
+                    parts.append(f"**Key findings:** {tbl['key_findings']}\n")
+                if tbl.get("issues") and tbl["issues"] != "none":
+                    parts.append(f"> **Issues:** {tbl['issues']}\n")
 
-        # --- Experimental Results ---
+        # ═══════════════════════════════════════════════════════════════
+        # EXPERIMENTAL RESULTS
+        # ═══════════════════════════════════════════════════════════════
         exp = review.sections.get(ReviewSectionType.EXPERIMENTAL_RESULTS, {})
         if exp:
             parts.append("## Experimental Results\n")
+            # Setup summary table
+            setup_rows = []
             if exp.get("datasets"):
-                parts.append(f"**Datasets:** {', '.join(exp['datasets'])}\n")
+                setup_rows.append(("Datasets", ", ".join(exp["datasets"])))
             if exp.get("baselines"):
-                parts.append(f"**Baselines:** {', '.join(exp['baselines'])}\n")
+                setup_rows.append(("Baselines", ", ".join(exp["baselines"])))
             if exp.get("metrics"):
-                parts.append(f"**Metrics:** {', '.join(exp['metrics'])}\n")
+                setup_rows.append(("Metrics", ", ".join(exp["metrics"])))
+            if setup_rows:
+                parts.append("| Aspect | Details |")
+                parts.append("|:-------|:--------|")
+                for label, val in setup_rows:
+                    parts.append(f"| {label} | {val} |")
+                parts.append("")
+
             if exp.get("main_results"):
-                parts.append(f"{exp['main_results']}\n")
+                parts.append(f"### Main Results\n\n{exp['main_results']}\n")
+            if exp.get("statistical_rigor"):
+                parts.append(f"### Statistical Rigor\n\n{exp['statistical_rigor']}\n")
             if exp.get("ablation_studies"):
-                parts.append(f"**Ablation Studies:** {exp['ablation_studies']}\n")
+                parts.append(f"### Ablation Studies\n\n{exp['ablation_studies']}\n")
+            if exp.get("missing_experiments"):
+                parts.append("### Missing Experiments\n")
+                for me in exp["missing_experiments"]:
+                    parts.append(f"- {me}")
+                parts.append("")
             if exp.get("notable_findings"):
-                parts.append("**Notable Findings:**\n")
+                parts.append("### Notable Findings\n")
                 for nf in exp["notable_findings"]:
                     parts.append(f"- {nf}")
                 parts.append("")
 
-        # --- Strengths and Weaknesses ---
-        sw = review.sections.get(ReviewSectionType.STRENGTHS_WEAKNESSES, {})
+        # ═══════════════════════════════════════════════════════════════
+        # REPRODUCIBILITY ASSESSMENT
+        # ═══════════════════════════════════════════════════════════════
+        repro = review.sections.get(ReviewSectionType.REPRODUCIBILITY, {})
+        if repro and repro.get("reproducibility_score", "unknown") != "unknown":
+            parts.append("## Reproducibility Assessment\n")
+            score = repro.get("reproducibility_score", "unknown")
+            parts.append(f"**Overall Score: `{score}`**\n")
+
+            parts.append("| Dimension | Assessment |")
+            parts.append("|:----------|:-----------|")
+            dims = [
+                ("Code Availability", "code_availability"),
+                ("Data Availability", "data_availability"),
+                ("Methodology Clarity", "methodology_clarity"),
+                ("Hyperparameter Reporting", "hyperparameter_reporting"),
+                ("Compute Requirements", "computational_requirements"),
+                ("Variance Reporting", "variance_reporting"),
+            ]
+            for label, key in dims:
+                val = repro.get(key, "")
+                if val:
+                    parts.append(f"| {label} | {val} |")
+            parts.append("")
+
+            if repro.get("missing_details"):
+                parts.append("**Missing for Reproducibility:**\n")
+                for md in repro["missing_details"]:
+                    parts.append(f"- {md}")
+                parts.append("")
+
+        # ═══════════════════════════════════════════════════════════════
+        # STRENGTHS & WEAKNESSES
+        # ═══════════════════════════════════════════════════════════════
         if sw:
-            parts.append("## Strengths and Weaknesses\n")
+            parts.append("## Strengths & Weaknesses\n")
             if sw.get("strengths"):
                 parts.append("### Strengths\n")
                 for s in sw["strengths"]:
-                    parts.append(f"- **{s.get('point', '')}**")
+                    cat = s.get("category", "")
+                    sig_level = s.get("significance", "")
+                    tags = []
+                    if cat:
+                        tags.append(cat)
+                    if sig_level:
+                        tags.append(sig_level)
+                    tag_str = f" `{'|'.join(tags)}`" if tags else ""
+                    parts.append(f"- **{s.get('point', '')}**{tag_str}")
                     if s.get("evidence"):
                         parts.append(f"  - {s['evidence']}")
                 parts.append("")
             if sw.get("weaknesses"):
                 parts.append("### Weaknesses\n")
                 for w in sw["weaknesses"]:
-                    parts.append(f"- **{w.get('point', '')}**")
+                    severity = w.get("severity", "")
+                    badge = self._SEVERITY_BADGES.get(severity, "")
+                    cat = w.get("category", "")
+                    cat_tag = f" `{cat}`" if cat else ""
+                    parts.append(f"- {badge} **{w.get('point', '')}**{cat_tag}")
                     if w.get("evidence"):
                         parts.append(f"  - {w['evidence']}")
+                    if w.get("suggestion"):
+                        parts.append(f"  - *Suggestion:* {w['suggestion']}")
                 parts.append("")
             if sw.get("overall_assessment"):
-                parts.append(f"**Overall Assessment:** {sw['overall_assessment']}\n")
+                parts.append(f"> **Assessment:** {sw['overall_assessment']}\n")
+
+        # ═══════════════════════════════════════════════════════════════
+        # IMPACT & SIGNIFICANCE
+        # ═══════════════════════════════════════════════════════════════
+        if imp:
+            parts.append("## Impact & Significance\n")
+            sig_rating = imp.get("significance_rating", "")
+            if sig_rating:
+                label = self._SIGNIFICANCE_LABELS.get(sig_rating, sig_rating)
+                parts.append(f"**Rating: `{label}`**\n")
+            if imp.get("field_impact"):
+                parts.append(f"**Field Impact:** {imp['field_impact']}\n")
+            if imp.get("practical_applications"):
+                parts.append("**Practical Applications:**\n")
+                for pa in imp["practical_applications"]:
+                    parts.append(f"- {pa}")
+                parts.append("")
+            if imp.get("broader_impact"):
+                parts.append(f"**Broader Impact:** {imp['broader_impact']}\n")
+            if imp.get("limitations_of_impact"):
+                parts.append(f"**Limitations:** {imp['limitations_of_impact']}\n")
+            if imp.get("future_directions"):
+                parts.append("### Future Directions\n")
+                for fd in imp["future_directions"]:
+                    if isinstance(fd, dict):
+                        parts.append(f"- **{fd.get('direction', '')}**")
+                        if fd.get("potential"):
+                            parts.append(f"  - {fd['potential']}")
+                    else:
+                        parts.append(f"- {fd}")
+                parts.append("")
 
-        # --- Related Work ---
+        # ═══════════════════════════════════════════════════════════════
+        # RELATED WORK
+        # ═══════════════════════════════════════════════════════════════
         rw = review.sections.get(ReviewSectionType.RELATED_WORK, {})
         if rw:
-            parts.append("## Related Work Context\n")
+            parts.append("## Related Work\n")
             if rw.get("research_areas"):
-                parts.append(f"**Research Areas:** {', '.join(rw['research_areas'])}\n")
+                # Support both old (list of strings) and new (list of dicts) format
+                for area in rw["research_areas"]:
+                    if isinstance(area, dict):
+                        parts.append(f"**{area.get('area', '')}:** {area.get('description', '')}\n")
+                    else:
+                        parts.append(f"- {area}")
+                if isinstance(rw["research_areas"][0], str):
+                    parts.append("")
             if rw.get("key_prior_works"):
+                parts.append("### Key Prior Works\n")
                 for pw in rw["key_prior_works"]:
-                    parts.append(f"- **{pw.get('work', '')}**")
-                    if pw.get("relationship"):
-                        parts.append(f"  - {pw['relationship']}")
+                    rel = pw.get("relationship", "")
+                    rel_tag = f" `{rel}`" if rel else ""
+                    parts.append(f"- **{pw.get('work', '')}**{rel_tag}")
+                    if pw.get("comparison"):
+                        parts.append(f"  - {pw['comparison']}")
+                parts.append("")
+            if rw.get("coverage_gaps"):
+                parts.append("### Coverage Gaps\n")
+                for gap in rw["coverage_gaps"]:
+                    parts.append(f"- {gap}")
                 parts.append("")
             if rw.get("positioning"):
                 parts.append(f"**Positioning:** {rw['positioning']}\n")
 
-        # --- Glossary ---
+        # ═══════════════════════════════════════════════════════════════
+        # GLOSSARY
+        # ═══════════════════════════════════════════════════════════════
         gl = review.sections.get(ReviewSectionType.GLOSSARY, {})
         if gl and gl.get("terms"):
             parts.append("## Glossary\n")
-            parts.append("| Term | Definition |")
-            parts.append("|------|-----------|")
+            parts.append("| Term | Category | Definition |")
+            parts.append("|:-----|:---------|:-----------|")
             for t in gl["terms"]:
                 term = t.get("term", "")
                 defn = t.get("definition", "").replace("|", "\\|")
-                parts.append(f"| **{term}** | {defn} |")
+                cat = t.get("category", "concept")
+                parts.append(f"| **{term}** | `{cat}` | {defn} |")
             parts.append("")
 
-        # --- Questions ---
+        # ═══════════════════════════════════════════════════════════════
+        # QUESTIONS FOR AUTHORS
+        # ═══════════════════════════════════════════════════════════════
         qs = review.sections.get(ReviewSectionType.QUESTIONS, {})
         if qs and qs.get("questions"):
-            parts.append("## Questions for Further Investigation\n")
-            for q in qs["questions"]:
+            parts.append("## Questions for Authors\n")
+            for i, q in enumerate(qs["questions"], 1):
                 qtype = q.get("type", "general")
-                parts.append(f"- **[{qtype}]** {q.get('question', '')}")
+                priority = q.get("priority", "")
+                priority_tag = f" `{priority}`" if priority else ""
+                parts.append(f"**Q{i} [{qtype}]{priority_tag}:** {q.get('question', '')}\n")
                 if q.get("motivation"):
-                    parts.append(f"  - *Motivation:* {q['motivation']}")
-            parts.append("")
+                    parts.append(f"*{q['motivation']}*\n")
+                if q.get("relevant_section"):
+                    parts.append(f"*Related to: {q['relevant_section']}*\n")
+
+        # ═══════════════════════════════════════════════════════════════
+        # READING GUIDE
+        # ═══════════════════════════════════════════════════════════════
+        rg = review.sections.get(ReviewSectionType.READING_GUIDE, {})
+        if rg:
+            parts.append("## Reading Guide\n")
+            time_est = rg.get("time_estimate_minutes", "")
+            difficulty = rg.get("difficulty_level", "")
+            if time_est or difficulty:
+                meta = []
+                if time_est:
+                    meta.append(f"~{time_est} min")
+                if difficulty:
+                    meta.append(difficulty)
+                parts.append(f"**{' | '.join(meta)}**\n")
+            if rg.get("essential_sections"):
+                parts.append("**Must-read sections:**\n")
+                for s in rg["essential_sections"]:
+                    parts.append(f"- {s}")
+                parts.append("")
+            if rg.get("skip_if_familiar"):
+                parts.append("**Skip if familiar with the domain:**\n")
+                for s in rg["skip_if_familiar"]:
+                    parts.append(f"- {s}")
+                parts.append("")
+            if rg.get("suggested_reading_order"):
+                order = " -> ".join(rg["suggested_reading_order"])
+                parts.append(f"**Suggested reading order:** {order}\n")
+            if rg.get("key_figures"):
+                parts.append("**Key figures:** " + ", ".join(rg["key_figures"]) + "\n")
+            if rg.get("key_tables"):
+                parts.append("**Key tables:** " + ", ".join(rg["key_tables"]) + "\n")
+            if rg.get("prerequisite_knowledge"):
+                parts.append("**Prerequisites:**\n")
+                for p in rg["prerequisite_knowledge"]:
+                    parts.append(f"- {p}")
+                parts.append("")
 
-        # --- Footer ---
+        # ═══════════════════════════════════════════════════════════════
+        # FOOTER
+        # ═══════════════════════════════════════════════════════════════
         parts.append("---")
         parts.append(
-            f"*Generated by arXiv Explorer | {review.generated_at.strftime('%Y-%m-%d %H:%M')}*"
+            f"*Generated by arXiv Explorer | "
+            f"{review.generated_at.strftime('%Y-%m-%d %H:%M')} | "
+            f"{len(review.sections)}/{len(ReviewSectionType)} sections*"
         )
 
         markdown = "\n".join(parts)
@@ -537,6 +847,7 @@ def _build_prompt(
     ) -> str:
         """Build the AI prompt for a given section type."""
         header = (
+            f"{self._REVIEWER_PERSONA}"
             f"Paper: {paper.title}\n"
             f"Authors: {', '.join(paper.authors[:10])}\n"
             f"arXiv ID: {paper.arxiv_id}\n"
@@ -553,10 +864,13 @@ def _build_prompt(
             ReviewSectionType.FIGURES: self._prompt_figures,
             ReviewSectionType.TABLES: self._prompt_tables,
             ReviewSectionType.EXPERIMENTAL_RESULTS: self._prompt_experiments,
+            ReviewSectionType.REPRODUCIBILITY: self._prompt_reproducibility,
             ReviewSectionType.STRENGTHS_WEAKNESSES: self._prompt_strengths_weaknesses,
+            ReviewSectionType.IMPACT_SIGNIFICANCE: self._prompt_impact_significance,
             ReviewSectionType.RELATED_WORK: self._prompt_related_work,
             ReviewSectionType.GLOSSARY: self._prompt_glossary,
             ReviewSectionType.QUESTIONS: self._prompt_questions,
+            ReviewSectionType.READING_GUIDE: self._prompt_reading_guide,
         }
 
         return builders[section_type](
@@ -575,13 +889,24 @@ def _prompt_executive_summary(self, header, full_text_md, **_) -> str:
 
 {context_block}
 
-Analyze this paper and provide an executive summary.
+Provide an executive summary as if writing the opening paragraph of a peer review.
+Your summary must demonstrate that you understand the paper's core argument, not just its topic.
+
+EVALUATION CRITERIA:
+- The TL;DR should convey what was done, why, and the key result — a reader should be able to decide whether to read the paper from this alone.
+- The research question must be stated as a precise, answerable question, not a vague topic.
+- The novelty claim must distinguish what is genuinely new vs. what is incremental improvement.
+- The verdict must be a honest, balanced assessment — not sales copy.
+
 IMPORTANT: Respond ONLY with valid JSON, no other text.
 {{
-    "tldr": "3-5 sentence TL;DR capturing the core contribution and result",
-    "research_question": "The main research question addressed",
-    "approach_summary": "1-2 sentence summary of the approach",
-    "main_result": "The most important quantitative or qualitative result"
+    "tldr": "3-5 sentence TL;DR that captures the problem, approach, key result, and why it matters",
+    "research_question": "The precise research question or hypothesis this paper addresses",
+    "approach_summary": "1-2 sentence summary of the technical approach and its key innovation",
+    "main_result": "The single most important result, stated with specific numbers/metrics where available",
+    "novelty_claim": "What the paper claims as its novel contribution, distinguished from prior work",
+    "target_audience": "Who would benefit most from reading this paper (specific research communities or practitioners)",
+    "one_sentence_verdict": "Single sentence balanced assessment capturing both promise and limitations"
 }}"""
 
     def _prompt_contributions(self, header, full_text_md, **_) -> str:
@@ -591,14 +916,23 @@ def _prompt_contributions(self, header, full_text_md, **_) -> str:
 
 {context_block}
 
-List the key contributions of this paper.
+Identify and evaluate the key contributions of this paper. For each contribution:
+- Distinguish between claimed contributions and actually demonstrated ones.
+- Assess novelty relative to the state of the art — is this genuinely new, or an incremental refinement?
+- Evaluate how well the paper supports each claim with evidence (experiments, proofs, ablations).
+
+DO NOT simply restate the paper's own claims. Critically evaluate whether the evidence supports them.
+Limit to 3-6 contributions, ranked by significance.
+
 IMPORTANT: Respond ONLY with valid JSON, no other text.
 {{
     "contributions": [
         {{
-            "contribution": "Description of the contribution",
+            "contribution": "Precise description of the contribution",
             "type": "theoretical|methodological|empirical|system|dataset",
-            "significance": "Why this matters"
+            "novelty": "incremental|moderate|significant — justify in one phrase",
+            "significance": "Why this contribution matters to the field",
+            "evidence_strength": "How well the paper supports this claim (strong/moderate/weak, with brief justification)"
         }}
     ]
 }}"""
@@ -617,14 +951,20 @@ def _prompt_section_summaries(self, header, paper_sections, **_) -> str:
 Paper sections:
 {sections_text if sections_text else "(Full text not available -- analyze based on abstract)"}
 
-For each major section of the paper, provide a summary paragraph.
+Provide a structured summary of each major section. For each section:
+- Summarize the content in your own words (do not copy verbatim).
+- Identify 2-4 key points that carry the argument forward.
+- Assess how well the section fulfills its role in the paper's overall narrative.
+- Rate the clarity: is the section well-written, or does it need improvement?
+
 IMPORTANT: Respond ONLY with valid JSON, no other text.
 {{
     "sections": [
         {{
-            "heading": "Section heading as it appears",
-            "summary": "2-4 sentence summary of this section",
-            "key_points": ["point 1", "point 2"]
+            "heading": "Section heading as it appears in the paper",
+            "summary": "2-4 sentence summary capturing the section's purpose and content",
+            "key_points": ["key point 1", "key point 2"],
+            "clarity_assessment": "clear|mostly_clear|needs_improvement — brief justification"
         }}
     ]
 }}"""
@@ -651,19 +991,37 @@ def _prompt_methodology(self, header, paper_sections, full_text_md, **_) -> str:
 Relevant sections:
 {method_text if method_text else "(Analyze methodology from abstract)"}
 
-Provide a detailed methodology analysis.
+Provide a rigorous methodology analysis through the lens of a peer reviewer.
+
+EVALUATION CRITERIA:
+- Is the method well-motivated? Does the paper justify why this approach over alternatives?
+- Is each step clearly defined with enough detail for replication?
+- Are assumptions stated explicitly? How reasonable are they?
+- What are the methodological limitations the authors may not acknowledge?
+- Is the computational complexity discussed?
+
+For each methodological step, assess whether it is genuinely novel, an adaptation of existing work, or standard practice. Identify the specific prior work it builds upon where applicable.
+
 IMPORTANT: Respond ONLY with valid JSON, no other text.
 {{
-    "overview": "High-level description of the methodology",
+    "overview": "High-level description of the methodology and its key innovation",
     "steps": [
         {{
             "step_name": "Name of this step/component",
-            "description": "Detailed explanation",
-            "novelty": "What is novel about this step (or 'standard' if not novel)"
+            "description": "Detailed technical explanation",
+            "novelty": "novel|adaptation|standard — cite the specific prior work if adaptation/standard",
+            "justification": "Why this design choice was made (as stated or inferred)"
+        }}
+    ],
+    "assumptions": [
+        {{
+            "assumption": "Description of the assumption",
+            "validity": "How reasonable this assumption is and when it might break",
+            "impact_if_violated": "Consequence if this assumption does not hold"
         }}
     ],
-    "assumptions": ["Key assumption 1", "Key assumption 2"],
-    "complexity_notes": "Computational complexity or scalability notes if mentioned"
+    "limitations": ["Methodological limitation not acknowledged by the authors"],
+    "complexity_notes": "Computational/memory complexity and scalability analysis"
 }}"""
 
     def _prompt_math(self, header, math_blocks, **_) -> str:
@@ -677,15 +1035,25 @@ def _prompt_math(self, header, math_blocks, **_) -> str:
 Key equations found:
 {math_text if math_text else "(No display equations detected)"}
 
-Explain the key mathematical formulations in plain language.
+Analyze the key mathematical formulations. For each equation:
+- Provide the original LaTeX (preserve notation exactly).
+- Explain in plain language what the equation computes and why.
+- Identify which variables are inputs, outputs, and hyperparameters.
+- Assess correctness: are there dimensional inconsistencies, missing terms, or notation ambiguities?
+- Explain its role in the paper's argument — is this a definition, a derivation step, or a key result?
+
+Focus on the 5-10 most important equations. Skip trivial definitions.
+
 IMPORTANT: Respond ONLY with valid JSON, no other text.
 {{
     "formulations": [
         {{
-            "equation_label": "Equation number or name",
-            "latex": "Original LaTeX",
-            "plain_language": "What this equation means in plain English",
-            "role": "How it fits into the overall methodology"
+            "equation_label": "Equation number or descriptive name (e.g., 'Eq. 3 — Loss function')",
+            "latex": "Original LaTeX notation",
+            "plain_language": "What this equation computes, explained for a graduate student",
+            "variables": "Key variables and their meanings",
+            "role": "definition|derivation_step|key_result|constraint|objective — how it fits the argument",
+            "correctness_note": "Any concerns about correctness, notation, or missing terms (or 'appears sound')"
         }}
     ]
 }}"""
@@ -702,14 +1070,22 @@ def _prompt_figures(self, header, figure_captions, **_) -> str:
 Figure captions and context:
 {figs_text if figs_text else "(No figures detected)"}
 
-Describe each figure based on its caption and surrounding context.
+Analyze each figure as a reviewer would. For each figure:
+- Describe what the figure shows (chart type, axes, data series).
+- Assess whether the caption is self-contained — could a reader understand the figure from the caption alone?
+- Evaluate the figure's role: does it support a specific claim in the text? Which one?
+- Note any presentation issues: missing labels, unclear legends, inappropriate chart types, etc.
+
 IMPORTANT: Respond ONLY with valid JSON, no other text.
 {{
     "figures": [
         {{
             "figure_id": "1",
-            "description": "What this figure likely shows based on caption and context",
-            "significance": "Why this figure is important for understanding the paper"
+            "description": "What the figure shows — chart type, axes, key data points",
+            "claim_supported": "Which specific claim or result this figure supports",
+            "caption_quality": "Is the caption self-contained? What's missing?",
+            "presentation_issues": "Any issues with readability, labeling, or chart type choice (or 'none')",
+            "significance": "How critical this figure is to the paper's argument (essential|supporting|supplementary)"
         }}
     ]
 }}"""
@@ -726,15 +1102,22 @@ def _prompt_tables(self, header, table_content, **_) -> str:
 Tables found in paper:
 {tables_text if tables_text else "(No tables detected)"}
 
-Analyze each table and describe its contents and significance.
+Analyze each table with the rigor of a peer reviewer. For each table:
+- Describe what the table presents (comparison, ablation, dataset statistics, etc.).
+- Identify the key takeaway — what is the most important result in this table?
+- Check for issues: missing baselines, unfair comparisons, cherry-picked metrics, or inconsistencies.
+- Note whether the table is self-contained with its caption.
+
 IMPORTANT: Respond ONLY with valid JSON, no other text.
 {{
     "tables": [
         {{
             "table_id": "1",
-            "caption": "Original caption",
-            "description": "What this table shows",
-            "key_findings": "Notable results or patterns in the data"
+            "caption": "Original caption text",
+            "table_type": "comparison|ablation|statistics|configuration|other",
+            "description": "What the table presents and how to read it",
+            "key_findings": "The most important result or pattern, with specific numbers",
+            "issues": "Any concerns: missing baselines, unfair comparisons, incomplete data (or 'none')"
         }}
     ]
 }}"""
@@ -767,15 +1150,75 @@ def _prompt_experiments(self, header, paper_sections, table_content, **_) -> str
 Result tables:
 {tables_summary}
 
-Analyze the experimental setup and results.
+Provide a rigorous analysis of the experimental evaluation as a peer reviewer.
+
+EVALUATION CRITERIA:
+- Are the datasets appropriate for the claims being made? Are they standard benchmarks?
+- Are the baselines fair and up-to-date? Are any important baselines missing?
+- Are the metrics standard for this task? Are they sufficient to support the conclusions?
+- Is there statistical significance reporting (error bars, confidence intervals, multiple runs)?
+- Are ablation studies present and do they isolate the contribution of each component?
+- Are there experiments that should have been included but weren't?
+
+Be specific: cite numbers from the tables where available.
+
+IMPORTANT: Respond ONLY with valid JSON, no other text.
+{{
+    "datasets": ["Dataset name — brief description of why it's appropriate or concerning"],
+    "baselines": ["Baseline method — is it a fair, up-to-date comparison?"],
+    "metrics": ["Metric — appropriate for the task?"],
+    "main_results": "Summary of quantitative results with specific numbers where available",
+    "statistical_rigor": "Assessment of statistical methodology: error bars, significance tests, number of runs",
+    "ablation_studies": "Summary of ablation studies and whether they sufficiently isolate contributions",
+    "missing_experiments": ["Experiment that would strengthen the paper but is absent"],
+    "notable_findings": ["Surprising or particularly strong/weak finding"]
+}}"""
+
+    def _prompt_reproducibility(self, header, paper_sections, full_text_md, **_) -> str:
+        method_text = ""
+        if paper_sections:
+            method_keywords = [
+                "method",
+                "experiment",
+                "implementation",
+                "setup",
+                "training",
+                "hyperparameter",
+                "appendix",
+            ]
+            for heading, content in paper_sections.items():
+                if any(kw in heading.lower() for kw in method_keywords):
+                    method_text += f"\n### {heading}\n{content[:1500]}\n"
+        if not method_text and full_text_md:
+            method_text = full_text_md[:4000]
+
+        return f"""{header}
+
+Relevant sections:
+{method_text if method_text else "(Analyze from abstract)"}
+
+Assess the reproducibility of this work. This is one of the most important aspects of scientific rigor.
+
+EVALUATE EACH DIMENSION:
+1. **Code**: Is code provided, promised, or entirely absent? Is it a link to a repo, pseudocode, or nothing?
+2. **Data**: Are datasets publicly available? Are preprocessing steps documented?
+3. **Method clarity**: Could an expert in the field reimplement the method from the paper alone?
+4. **Hyperparameters**: Are all hyperparameters, training details, and architectural choices specified?
+5. **Compute**: Are computational requirements (GPU type, training time, memory) reported?
+6. **Random seeds & variance**: Are experiments run with multiple seeds? Is variance reported?
+
+Assign a reproducibility score based on the NeurIPS reproducibility checklist standards.
+
 IMPORTANT: Respond ONLY with valid JSON, no other text.
 {{
-    "datasets": ["Dataset names used"],
-    "baselines": ["Baseline methods compared against"],
-    "metrics": ["Evaluation metrics used"],
-    "main_results": "Summary of main quantitative results",
-    "ablation_studies": "Summary of ablation studies if present",
-    "notable_findings": ["Finding 1", "Finding 2"]
+    "code_availability": "available_with_link|promised|pseudocode_only|not_mentioned — include URL if available",
+    "data_availability": "public_benchmark|available_with_link|described_but_not_shared|proprietary|not_mentioned",
+    "methodology_clarity": "sufficient_for_reimplementation|mostly_clear_with_gaps|insufficient — describe what's missing",
+    "hyperparameter_reporting": "complete|mostly_complete|significant_gaps|minimal — list what's missing",
+    "computational_requirements": "fully_reported|partially_reported|not_mentioned — include specifics if available",
+    "variance_reporting": "multiple_seeds_with_error_bars|single_run_acknowledged|not_addressed",
+    "reproducibility_score": "high|medium|low",
+    "missing_details": ["Specific detail needed for reproducibility that is absent from the paper"]
 }}"""
 
     def _prompt_strengths_weaknesses(self, header, full_text_md, **_) -> str:
@@ -788,22 +1231,83 @@ def _prompt_strengths_weaknesses(self, header, full_text_md, **_) -> str:
 
 {context_block}
 
-Provide a critical analysis of the paper's strengths and weaknesses.
+Write a structured peer review covering strengths and weaknesses, as if submitting a review to a top-tier venue.
+
+GUIDELINES:
+- Every point must cite specific evidence from the paper (section, equation, figure, or table number).
+- Categorize each point: technical correctness, novelty, presentation quality, experimental rigor, reproducibility, or scope.
+- Assign severity: minor (cosmetic or easily fixable), moderate (weakens but doesn't invalidate), major (significant concern), critical (potentially invalidating).
+- Strengths should be substantive, not generic ("well-written" alone is not a strength).
+- Weaknesses should be constructive: suggest how each could be addressed.
+- Provide 3-6 strengths and 3-6 weaknesses. Do NOT pad the list with trivial points.
+
+Finally, provide an overall recommendation as a reviewer would:
+- strong_accept: Excellent, top 5% of submissions
+- accept: Clear accept, solid contribution
+- weak_accept: Leans positive, minor concerns
+- borderline: Could go either way
+- weak_reject: Leans negative, significant concerns
+- reject: Below threshold for the venue
+
 IMPORTANT: Respond ONLY with valid JSON, no other text.
 {{
     "strengths": [
         {{
-            "point": "Strength description",
-            "evidence": "Supporting evidence or reasoning"
+            "point": "Concise strength statement",
+            "evidence": "Specific evidence from the paper (cite section/figure/table)",
+            "category": "technical|novelty|presentation|experimental|reproducibility|scope",
+            "significance": "minor|moderate|major"
         }}
     ],
     "weaknesses": [
         {{
-            "point": "Weakness description",
-            "evidence": "Supporting evidence or reasoning"
+            "point": "Concise weakness statement",
+            "evidence": "Specific evidence from the paper",
+            "category": "technical|novelty|presentation|experimental|reproducibility|scope",
+            "severity": "minor|moderate|major|critical",
+            "suggestion": "How this weakness could be addressed"
         }}
     ],
-    "overall_assessment": "1-2 sentence overall assessment"
+    "overall_assessment": "2-3 sentence balanced assessment that weighs strengths against weaknesses",
+    "recommendation": "strong_accept|accept|weak_accept|borderline|weak_reject|reject",
+    "confidence": "high|medium|low — how confident you are in this assessment"
+}}"""
+
+    def _prompt_impact_significance(self, header, full_text_md, **_) -> str:
+        context = ""
+        if full_text_md:
+            # Read intro and conclusion for impact context
+            context = full_text_md[:2500] + "\n...\n" + full_text_md[-2500:]
+        context_block = f"Paper content:\n{context}" if context else ""
+
+        return f"""{header}
+
+{context_block}
+
+Assess the broader impact and significance of this work. Think beyond the immediate technical contribution.
+
+EVALUATE:
+1. **Field impact**: How does this advance the state of the art? Is it opening a new direction or refining an existing one?
+2. **Practical applications**: Could this work be deployed in real systems? What are the barriers?
+3. **Broader impact**: Are there societal implications (positive or negative)?
+4. **Limitations of impact**: What factors limit the paper's influence (narrow scope, strong assumptions, limited evaluation)?
+5. **Future directions**: What research does this naturally lead to?
+
+Be realistic — most papers are incremental improvements, and that's fine. But clearly distinguish between truly significant work and solid-but-incremental contributions.
+
+IMPORTANT: Respond ONLY with valid JSON, no other text.
+{{
+    "field_impact": "How this work advances the field — be specific about what changes",
+    "practical_applications": ["Concrete practical application or use case"],
+    "broader_impact": "Societal or cross-disciplinary implications, if any",
+    "limitations_of_impact": "What limits the paper's real-world influence",
+    "future_directions": [
+        {{
+            "direction": "Specific future research direction",
+            "potential": "Why this direction is promising"
+        }}
+    ],
+    "significance_rating": "transformative|significant|solid_contribution|incremental|limited"
 }}"""
 
     def _prompt_related_work(self, header, paper_sections, **_) -> str:
@@ -825,17 +1329,33 @@ def _prompt_related_work(self, header, paper_sections, **_) -> str:
 Related work sections:
 {rw_text if rw_text else "(Analyze related work context from abstract)"}
 
-Summarize the related work landscape and how this paper positions itself.
+Analyze the related work and the paper's positioning within the field.
+
+EVALUATION CRITERIA:
+- Does the paper adequately cover the relevant literature? Are there notable omissions?
+- Is the comparison to prior work fair and accurate?
+- Does the paper clearly articulate how it differs from and improves upon existing approaches?
+- Are there concurrent works that should be acknowledged?
+
+Group related works by research area/theme rather than listing them sequentially.
+
 IMPORTANT: Respond ONLY with valid JSON, no other text.
 {{
-    "research_areas": ["Area 1", "Area 2"],
+    "research_areas": [
+        {{
+            "area": "Research area or theme name",
+            "description": "Brief description of this line of work and its relevance"
+        }}
+    ],
     "key_prior_works": [
         {{
-            "work": "Author et al. (Year) - brief description",
-            "relationship": "How this paper relates to or differs from it"
+            "work": "Author et al. (Year) — brief description",
+            "relationship": "extends|improves_upon|alternative_to|builds_on|concurrent_with",
+            "comparison": "How this paper specifically differs from or improves upon this work"
         }}
     ],
-    "positioning": "How the paper positions itself within the field"
+    "coverage_gaps": ["Important related work that appears to be missing from the paper's discussion"],
+    "positioning": "How the paper positions itself — is this positioning fair and well-supported?"
 }}"""
 
     def _prompt_glossary(self, header, full_text_md, **_) -> str:
@@ -846,14 +1366,19 @@ def _prompt_glossary(self, header, full_text_md, **_) -> str:
 
 {context_block}
 
-Extract key technical terms and provide definitions.
+Extract key technical terms, acronyms, and domain-specific notation used in this paper.
+Focus on terms that a reader from a related (but not identical) field would need defined.
+Skip universally known terms (e.g., "neural network", "gradient descent") unless the paper uses them with a non-standard meaning.
+Include mathematical notation where the paper defines symbols with specific meaning.
+
 IMPORTANT: Respond ONLY with valid JSON, no other text.
 {{
     "terms": [
         {{
-            "term": "Technical term",
-            "definition": "Clear definition as used in this paper",
-            "first_occurrence": "Section where it first appears (if known)"
+            "term": "Technical term or symbol",
+            "definition": "Clear definition as used specifically in this paper",
+            "first_occurrence": "Section where it first appears (if known)",
+            "category": "concept|acronym|notation|metric"
         }}
     ]
 }}"""
@@ -868,18 +1393,68 @@ def _prompt_questions(self, header, full_text_md, **_) -> str:
 
 {context_block}
 
-Suggest questions for further investigation based on this paper.
+Generate substantive questions that a thoughtful reviewer or reader would ask.
+
+Include a mix of:
+- **Clarification questions**: Where the paper is ambiguous or under-specified
+- **Methodological questions**: About design choices, alternatives, or limitations
+- **Extension questions**: How this work could be extended or applied to new domains
+- **Challenge questions**: Potential counterarguments or edge cases the authors should address
+
+Each question should be specific enough that the authors could write a concrete response. Avoid vague questions like "Can you elaborate on X?"
+
+Provide 5-8 questions, prioritized by importance.
+
 IMPORTANT: Respond ONLY with valid JSON, no other text.
 {{
     "questions": [
         {{
-            "question": "The question",
-            "motivation": "Why this question is interesting or important",
-            "type": "clarification|extension|limitation|application"
+            "question": "Specific, answerable question",
+            "motivation": "Why this question matters — what gap or concern it addresses",
+            "type": "clarification|methodological|extension|challenge",
+            "priority": "high|medium|low",
+            "relevant_section": "Which section of the paper this question relates to"
         }}
     ]
 }}"""
 
+    def _prompt_reading_guide(self, header, full_text_md, paper_sections, **_) -> str:
+        sections_list = ""
+        if paper_sections:
+            sections_list = ", ".join(
+                h for h in paper_sections.keys() if h != "_preamble"
+            )
+
+        context = full_text_md[:3000] if full_text_md else ""
+        context_block = f"Paper structure:\n{context}" if context else ""
+
+        return f"""{header}
+
+{context_block}
+Paper sections: {sections_list if sections_list else "(not available)"}
+
+Create a reading guide for this paper. The goal is to help a busy researcher decide how to invest their reading time.
+
+Consider:
+- Which sections are essential to understand the core contribution?
+- Which sections can be skipped by someone already familiar with the domain?
+- What is the optimal reading order (which may differ from the paper's linear order)?
+- Which figures and tables convey the most information?
+- What background knowledge is assumed?
+- How long should a thorough read take?
+
+IMPORTANT: Respond ONLY with valid JSON, no other text.
+{{
+    "essential_sections": ["Section names that are must-read to understand the paper"],
+    "skip_if_familiar": ["Sections an expert can safely skip"],
+    "key_figures": ["Figure N — brief reason why it's important"],
+    "key_tables": ["Table N — brief reason why it's important"],
+    "prerequisite_knowledge": ["Background knowledge or papers assumed by the authors"],
+    "suggested_reading_order": ["Optimal section order for maximum understanding"],
+    "time_estimate_minutes": 30,
+    "difficulty_level": "introductory|intermediate|advanced|expert"
+}}"""
+
     # ── AI Invocation ─────────────────────────────────────────────────
 
     def _invoke_ai(self, prompt: str) -> dict | None:
@@ -975,6 +1550,15 @@ def _empty_section_data(section_type: ReviewSectionType) -> dict:
             ReviewSectionType.FIGURES: {"figures": []},
             ReviewSectionType.TABLES: {"tables": []},
             ReviewSectionType.MATH_FORMULATIONS: {"formulations": []},
+            ReviewSectionType.REPRODUCIBILITY: {
+                "code_availability": "Unknown (full text not available)",
+                "data_availability": "Unknown",
+                "methodology_clarity": "Cannot assess without full text",
+                "hyperparameter_reporting": "Cannot assess without full text",
+                "computational_requirements": "Not mentioned",
+                "reproducibility_score": "unknown",
+                "missing_details": [],
+            },
         }
         return empty_maps.get(section_type, {})
 
diff --git a/tests/test_review_service.py b/tests/test_review_service.py
index 4d5a17d..b2c6a65 100644
--- a/tests/test_review_service.py
+++ b/tests/test_review_service.py
@@ -286,7 +286,7 @@ def test_renders_header(self, review_service, sample_paper):
         md = review_service.render_markdown(review)
         assert sample_paper.title in md
         assert sample_paper.arxiv_id in md
-        assert "Authors:" in md
+        assert "**Authors**" in md
 
     def test_renders_executive_summary(self, review_service, sample_paper):
         review = self._make_review(
@@ -367,11 +367,11 @@ def test_renders_source_type(self, review_service, sample_paper):
         review = self._make_review(sample_paper)
         review.source_type = "full_text"
         md = review_service.render_markdown(review)
-        assert "Full text analysis" in md
+        assert "Full text" in md
 
         review.source_type = "abstract"
         md = review_service.render_markdown(review)
-        assert "Abstract-only analysis" in md
+        assert "Abstract only" in md
 
 
 # ── Model Tests ───────────────────────────────────────────────────────
@@ -463,11 +463,29 @@ def _mock_responses(self):
                 "ablation_studies": "",
                 "notable_findings": [],
             },
+            ReviewSectionType.REPRODUCIBILITY: {
+                "code_availability": "not_mentioned",
+                "data_availability": "not_mentioned",
+                "methodology_clarity": "insufficient",
+                "hyperparameter_reporting": "minimal",
+                "computational_requirements": "not_mentioned",
+                "variance_reporting": "not_addressed",
+                "reproducibility_score": "low",
+                "missing_details": [],
+            },
             ReviewSectionType.STRENGTHS_WEAKNESSES: {
                 "strengths": [],
                 "weaknesses": [],
                 "overall_assessment": "",
             },
+            ReviewSectionType.IMPACT_SIGNIFICANCE: {
+                "field_impact": "",
+                "practical_applications": [],
+                "broader_impact": "",
+                "limitations_of_impact": "",
+                "future_directions": [],
+                "significance_rating": "incremental",
+            },
             ReviewSectionType.RELATED_WORK: {
                 "research_areas": [],
                 "key_prior_works": [],
@@ -475,6 +493,16 @@ def _mock_responses(self):
             },
             ReviewSectionType.GLOSSARY: {"terms": []},
             ReviewSectionType.QUESTIONS: {"questions": []},
+            ReviewSectionType.READING_GUIDE: {
+                "essential_sections": [],
+                "skip_if_familiar": [],
+                "key_figures": [],
+                "key_tables": [],
+                "prerequisite_knowledge": [],
+                "suggested_reading_order": [],
+                "time_estimate_minutes": 30,
+                "difficulty_level": "intermediate",
+            },
         }
 
     def test_generates_with_abstract_only(self, tmp_config: Config, sample_paper):
@@ -522,8 +550,8 @@ def tracking_invoke(prompt):
         assert review.sections[ReviewSectionType.EXECUTIVE_SUMMARY]["tldr"] == "Cached"
         # AI should have been called for remaining sections, minus:
         # - 1 cached (executive_summary)
-        # - 3 empty sections in abstract-only mode (figures, tables, math)
-        assert len(invoke_calls) == len(ReviewSectionType) - 1 - 3
+        # - 4 empty sections in abstract-only mode (figures, tables, math, reproducibility)
+        assert len(invoke_calls) == len(ReviewSectionType) - 1 - 4
 
     def test_force_regenerates_cached(self, tmp_config: Config, sample_paper):
         service = PaperReviewService()
@@ -549,9 +577,9 @@ def mock_invoke(prompt):
         service._invoke_ai = mock_invoke
 
         review = service.generate_review(sample_paper, force=True)
-        # With force=True, AI called for all sections except 3 empty
-        # (figures, tables, math) which get empty data in abstract-only mode
-        assert call_count[0] == len(ReviewSectionType) - 3
+        # With force=True, AI called for all sections except 4 empty
+        # (figures, tables, math, reproducibility) which get empty data in abstract-only mode
+        assert call_count[0] == len(ReviewSectionType) - 4
         assert review.sections[ReviewSectionType.EXECUTIVE_SUMMARY]["tldr"] == "Test"
 
     def test_callbacks_invoked(self, tmp_config: Config, sample_paper):

From 89696643db685dcc0909c1912470457b2f14c201 Mon Sep 17 00:00:00 2001
From: axect <axect.tg@proton.me>
Date: Tue, 7 Apr 2026 15:11:39 +0800
Subject: [PATCH 2/5] feat: auto-check for git updates on CLI startup

- Add update_checker module: git fetch, compare HEAD vs remote, detect conflicts
- Prompt user with y/n when updates are available, show conflict warnings
- 12-hour throttling to avoid repeated fetches
- Integrate into @app.callback() with --no-update-check escape hatch
- 19 tests covering throttling, conflict detection, network failure
---
 src/arxiv_explorer/cli/main.py            |  65 ++++++
 src/arxiv_explorer/core/update_checker.py | 196 +++++++++++++++++
 tests/test_update_checker.py              | 253 ++++++++++++++++++++++
 3 files changed, 514 insertions(+)
 create mode 100644 src/arxiv_explorer/core/update_checker.py
 create mode 100644 tests/test_update_checker.py

diff --git a/src/arxiv_explorer/cli/main.py b/src/arxiv_explorer/cli/main.py
index 97d3323..752ebc6 100644
--- a/src/arxiv_explorer/cli/main.py
+++ b/src/arxiv_explorer/cli/main.py
@@ -4,6 +4,7 @@
 from rich.console import Console
 
 from ..core.database import init_db
+from ..core.update_checker import UpdateStatus, check_for_updates, pull_updates
 
 app = typer.Typer(
     name="axp",
@@ -22,6 +23,58 @@ def version_callback(value: bool):
         raise typer.Exit()
 
 
+def _prompt_update(status: UpdateStatus) -> None:
+    """Display update info, warn about conflicts, and prompt user."""
+    console.print(
+        f"\n[bold yellow]Update available[/bold yellow]: "
+        f"{status.behind_count} new commit{'s' if status.behind_count != 1 else ''} "
+        f"on remote"
+    )
+
+    if status.ahead_count > 0:
+        console.print(
+            f"[dim](local is also {status.ahead_count} commit{'s' if status.ahead_count != 1 else ''} "
+            f"ahead of remote)[/dim]"
+        )
+
+    # Show changed files summary
+    if status.changed_files:
+        n = len(status.changed_files)
+        console.print(f"[dim]Changed files: {n}[/dim]")
+
+    # Warn about conflicts
+    if status.conflict_files:
+        console.print(
+            f"\n[bold red]Warning:[/bold red] "
+            f"The following locally modified files also changed on remote:"
+        )
+        for f in status.conflict_files:
+            console.print(f"  [red]- {f}[/red]")
+        console.print(
+            "[yellow]Pulling may cause merge conflicts. "
+            "Consider committing or stashing your local changes first.[/yellow]\n"
+        )
+
+    try:
+        answer = typer.prompt("Update now? [y/n]", default="n")
+    except (EOFError, KeyboardInterrupt):
+        console.print()
+        return
+
+    if answer.strip().lower() in ("y", "yes"):
+        console.print("[dim]Pulling updates...[/dim]")
+        success, message = pull_updates()
+        if success:
+            console.print(f"[green]Updated successfully.[/green] {message}")
+            console.print(
+                "[yellow]Note: if dependencies changed, run 'uv sync' to update them.[/yellow]\n"
+            )
+        else:
+            console.print(f"[red]Update failed:[/red] {message}\n")
+    else:
+        console.print("[dim]Skipped.[/dim]\n")
+
+
 @app.callback()
 def main(
     version: bool = typer.Option(
@@ -32,11 +85,23 @@ def main(
         is_eager=True,
         help="Show version",
     ),
+    no_update_check: bool = typer.Option(
+        False,
+        "--no-update-check",
+        hidden=True,
+        help="Skip update check",
+    ),
 ):
     """arXiv Explorer - Personalized paper recommendation system."""
     # Initialize DB
     init_db()
 
+    # Check for git updates (throttled, silent on failure)
+    if not no_update_check:
+        status = check_for_updates()
+        if status and status.has_update:
+            _prompt_update(status)
+
 
 # Import and register subcommands
 from . import config, daily, export, lists, notes, preferences, review, search  # noqa: E402
diff --git a/src/arxiv_explorer/core/update_checker.py b/src/arxiv_explorer/core/update_checker.py
new file mode 100644
index 0000000..1c4665e
--- /dev/null
+++ b/src/arxiv_explorer/core/update_checker.py
@@ -0,0 +1,196 @@
+"""Git-based update checker with throttling and conflict detection."""
+
+import subprocess
+import time
+from dataclasses import dataclass
+from pathlib import Path
+
+
+# Throttle: check at most once per this many seconds
+CHECK_INTERVAL_SECONDS = 12 * 60 * 60  # 12 hours
+
+# Git command timeout
+GIT_TIMEOUT_SECONDS = 10
+
+
+@dataclass
+class UpdateStatus:
+    """Result of an update check."""
+
+    has_update: bool = False
+    local_ref: str = ""
+    remote_ref: str = ""
+    behind_count: int = 0
+    ahead_count: int = 0
+    changed_files: list[str] | None = None  # files changed on remote
+    conflict_files: list[str] | None = None  # locally modified files that remote also changed
+    error: str | None = None
+
+
+def _get_repo_root() -> Path | None:
+    """Find the git repo root from the package's installed location."""
+    # Walk up from this file to find .git
+    current = Path(__file__).resolve().parent
+    for _ in range(10):
+        if (current / ".git").exists():
+            return current
+        parent = current.parent
+        if parent == current:
+            break
+        current = parent
+    return None
+
+
+def _run_git(repo: Path, *args: str, timeout: int = GIT_TIMEOUT_SECONDS) -> str | None:
+    """Run a git command, return stdout or None on failure."""
+    try:
+        result = subprocess.run(
+            ["git", "-C", str(repo), *args],
+            capture_output=True,
+            text=True,
+            timeout=timeout,
+        )
+        if result.returncode == 0:
+            return result.stdout.strip()
+        return None
+    except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
+        return None
+
+
+def _get_stamp_path(repo: Path) -> Path:
+    """Path to the last-check timestamp file."""
+    return repo / ".git" / "axp_update_check"
+
+
+def _should_check(repo: Path) -> bool:
+    """Return True if enough time has passed since the last check."""
+    stamp = _get_stamp_path(repo)
+    if not stamp.exists():
+        return True
+    try:
+        last = float(stamp.read_text().strip())
+        return (time.time() - last) >= CHECK_INTERVAL_SECONDS
+    except (ValueError, OSError):
+        return True
+
+
+def _touch_stamp(repo: Path) -> None:
+    """Record the current time as last-checked."""
+    try:
+        _get_stamp_path(repo).write_text(str(time.time()))
+    except OSError:
+        pass
+
+
+def _get_tracking_branch(repo: Path) -> str | None:
+    """Get the remote tracking branch for the current branch (e.g. 'origin/main')."""
+    branch = _run_git(repo, "rev-parse", "--abbrev-ref", "HEAD")
+    if not branch:
+        return None
+    upstream = _run_git(repo, "rev-parse", "--abbrev-ref", f"{branch}@{{upstream}}")
+    return upstream  # e.g. "origin/main"
+
+
+def check_for_updates(repo: Path | None = None, force: bool = False) -> UpdateStatus | None:
+    """Check if the remote has new commits.
+
+    Returns UpdateStatus if a check was performed, None if skipped (throttled or not a repo).
+    """
+    if repo is None:
+        repo = _get_repo_root()
+    if repo is None:
+        return None
+
+    if not force and not _should_check(repo):
+        return None
+
+    # Find tracking branch
+    upstream = _get_tracking_branch(repo)
+    if not upstream:
+        _touch_stamp(repo)
+        return None
+
+    remote_name = upstream.split("/")[0] if "/" in upstream else "origin"
+
+    # Fetch from remote (lightweight, no merge)
+    fetch_result = _run_git(repo, "fetch", remote_name, "--quiet")
+    if fetch_result is None:
+        # Network failure — silently skip
+        _touch_stamp(repo)
+        return UpdateStatus(error="fetch failed (network issue?)")
+
+    _touch_stamp(repo)
+
+    # Compare local HEAD vs upstream
+    local_ref = _run_git(repo, "rev-parse", "HEAD") or ""
+    remote_ref = _run_git(repo, "rev-parse", upstream) or ""
+
+    if local_ref == remote_ref:
+        return UpdateStatus(local_ref=local_ref, remote_ref=remote_ref)
+
+    # Count ahead/behind
+    rev_list = _run_git(repo, "rev-list", "--left-right", "--count", f"HEAD...{upstream}")
+    ahead, behind = 0, 0
+    if rev_list:
+        parts = rev_list.split()
+        if len(parts) == 2:
+            ahead, behind = int(parts[0]), int(parts[1])
+
+    if behind == 0:
+        # Local is ahead or in sync — no update needed
+        return UpdateStatus(
+            local_ref=local_ref,
+            remote_ref=remote_ref,
+            ahead_count=ahead,
+        )
+
+    # There are updates to pull — find which files changed
+    changed_raw = _run_git(repo, "diff", "--name-only", f"HEAD...{upstream}")
+    changed_files = changed_raw.splitlines() if changed_raw else []
+
+    # Detect potential conflicts: locally modified files that also changed on remote
+    local_modified_raw = _run_git(repo, "diff", "--name-only")
+    local_staged_raw = _run_git(repo, "diff", "--name-only", "--cached")
+
+    local_dirty: set[str] = set()
+    if local_modified_raw:
+        local_dirty.update(local_modified_raw.splitlines())
+    if local_staged_raw:
+        local_dirty.update(local_staged_raw.splitlines())
+
+    # Also check untracked files that overlap with remote changes
+    # (not common but possible if remote adds a file the user also created)
+    untracked_raw = _run_git(repo, "ls-files", "--others", "--exclude-standard")
+    if untracked_raw:
+        local_dirty.update(untracked_raw.splitlines())
+
+    conflict_files = sorted(local_dirty & set(changed_files))
+
+    return UpdateStatus(
+        has_update=True,
+        local_ref=local_ref,
+        remote_ref=remote_ref,
+        behind_count=behind,
+        ahead_count=ahead,
+        changed_files=changed_files,
+        conflict_files=conflict_files if conflict_files else None,
+    )
+
+
+def pull_updates(repo: Path | None = None) -> tuple[bool, str]:
+    """Run git pull. Returns (success, message)."""
+    if repo is None:
+        repo = _get_repo_root()
+    if repo is None:
+        return False, "Not a git repository"
+
+    result = _run_git(repo, "pull", "--ff-only", timeout=30)
+    if result is not None:
+        return True, result
+
+    # --ff-only failed, try normal pull
+    result = _run_git(repo, "pull", timeout=30)
+    if result is not None:
+        return True, result
+
+    return False, "git pull failed — you may need to resolve conflicts manually"
diff --git a/tests/test_update_checker.py b/tests/test_update_checker.py
new file mode 100644
index 0000000..df5efb6
--- /dev/null
+++ b/tests/test_update_checker.py
@@ -0,0 +1,253 @@
+"""Tests for the git update checker."""
+
+import time
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from arxiv_explorer.core.update_checker import (
+    CHECK_INTERVAL_SECONDS,
+    UpdateStatus,
+    _get_stamp_path,
+    _should_check,
+    _touch_stamp,
+    check_for_updates,
+    pull_updates,
+)
+
+
+@pytest.fixture
+def fake_repo(tmp_path: Path):
+    """Create a fake git directory structure."""
+    git_dir = tmp_path / ".git"
+    git_dir.mkdir()
+    return tmp_path
+
+
+# ── Throttling Tests ─────────────────────────────────────────────────
+
+
+class TestThrottling:
+    def test_should_check_no_stamp(self, fake_repo):
+        """First run should always check."""
+        assert _should_check(fake_repo) is True
+
+    def test_should_check_fresh_stamp(self, fake_repo):
+        """Just checked — should not check again."""
+        _touch_stamp(fake_repo)
+        assert _should_check(fake_repo) is False
+
+    def test_should_check_stale_stamp(self, fake_repo):
+        """Stamp older than interval — should check."""
+        stamp = _get_stamp_path(fake_repo)
+        stamp.write_text(str(time.time() - CHECK_INTERVAL_SECONDS - 1))
+        assert _should_check(fake_repo) is True
+
+    def test_should_check_corrupt_stamp(self, fake_repo):
+        """Corrupt stamp file — should check."""
+        stamp = _get_stamp_path(fake_repo)
+        stamp.write_text("not-a-number")
+        assert _should_check(fake_repo) is True
+
+    def test_touch_stamp_creates_file(self, fake_repo):
+        stamp = _get_stamp_path(fake_repo)
+        assert not stamp.exists()
+        _touch_stamp(fake_repo)
+        assert stamp.exists()
+        value = float(stamp.read_text())
+        assert abs(value - time.time()) < 5
+
+
+# ── UpdateStatus Tests ───────────────────────────────────────────────
+
+
+class TestUpdateStatus:
+    def test_defaults(self):
+        s = UpdateStatus()
+        assert s.has_update is False
+        assert s.behind_count == 0
+        assert s.conflict_files is None
+
+    def test_with_conflicts(self):
+        s = UpdateStatus(
+            has_update=True,
+            behind_count=3,
+            changed_files=["a.py", "b.py", "c.py"],
+            conflict_files=["b.py"],
+        )
+        assert s.has_update
+        assert s.conflict_files == ["b.py"]
+
+
+# ── check_for_updates Tests ─────────────────────────────────────────
+
+
+class TestCheckForUpdates:
+    def test_returns_none_when_no_repo(self):
+        """Not a git repo — should return None."""
+        result = check_for_updates(repo=Path("/tmp/definitely-not-a-repo"))
+        assert result is None
+
+    def test_returns_none_when_throttled(self, fake_repo):
+        """Recently checked — should skip."""
+        _touch_stamp(fake_repo)
+        result = check_for_updates(repo=fake_repo)
+        assert result is None
+
+    def test_force_bypasses_throttle(self, fake_repo):
+        """force=True should check even if recently checked."""
+        _touch_stamp(fake_repo)
+        with patch(
+            "arxiv_explorer.core.update_checker._get_tracking_branch",
+            return_value=None,
+        ), patch(
+            "arxiv_explorer.core.update_checker._run_git",
+            return_value=None,
+        ):
+            result = check_for_updates(repo=fake_repo, force=True)
+            # No tracking branch → returns None after fetch attempt
+            assert result is None
+
+    @patch("arxiv_explorer.core.update_checker._run_git")
+    @patch(
+        "arxiv_explorer.core.update_checker._get_tracking_branch",
+        return_value="origin/main",
+    )
+    def test_no_update_when_refs_match(self, _mock_track, mock_git, fake_repo):
+        """Same HEAD and remote — no update."""
+        same_ref = "abc123"
+        mock_git.side_effect = lambda repo, *args, **kw: {
+            ("fetch",): "",
+            ("rev-parse", "HEAD"): same_ref,
+            ("rev-parse", "origin/main"): same_ref,
+        }.get(args, "")
+
+        result = check_for_updates(repo=fake_repo, force=True)
+        assert result is not None
+        assert result.has_update is False
+
+    @patch("arxiv_explorer.core.update_checker._run_git")
+    @patch(
+        "arxiv_explorer.core.update_checker._get_tracking_branch",
+        return_value="origin/main",
+    )
+    def test_detects_update(self, _mock_track, mock_git, fake_repo):
+        """Remote is ahead — should detect update."""
+
+        def git_dispatcher(repo, *args, **kw):
+            key = args
+            return {
+                ("fetch", "origin", "--quiet"): "",
+                ("rev-parse", "HEAD"): "local111",
+                ("rev-parse", "origin/main"): "remote222",
+                ("rev-list", "--left-right", "--count", "HEAD...origin/main"): "0\t5",
+                ("diff", "--name-only", "HEAD...origin/main"): "src/a.py\nsrc/b.py",
+                ("diff", "--name-only"): "",
+                ("diff", "--name-only", "--cached"): "",
+                ("ls-files", "--others", "--exclude-standard"): "",
+            }.get(key)
+
+        mock_git.side_effect = git_dispatcher
+
+        result = check_for_updates(repo=fake_repo, force=True)
+        assert result is not None
+        assert result.has_update is True
+        assert result.behind_count == 5
+        assert result.changed_files == ["src/a.py", "src/b.py"]
+        assert result.conflict_files is None
+
+    @patch("arxiv_explorer.core.update_checker._run_git")
+    @patch(
+        "arxiv_explorer.core.update_checker._get_tracking_branch",
+        return_value="origin/main",
+    )
+    def test_detects_conflicts(self, _mock_track, mock_git, fake_repo):
+        """Locally modified file overlaps with remote change."""
+
+        def git_dispatcher(repo, *args, **kw):
+            key = args
+            return {
+                ("fetch", "origin", "--quiet"): "",
+                ("rev-parse", "HEAD"): "local111",
+                ("rev-parse", "origin/main"): "remote222",
+                ("rev-list", "--left-right", "--count", "HEAD...origin/main"): "0\t2",
+                ("diff", "--name-only", "HEAD...origin/main"): "src/a.py\nsrc/b.py",
+                ("diff", "--name-only"): "src/b.py",  # locally modified
+                ("diff", "--name-only", "--cached"): "",
+                ("ls-files", "--others", "--exclude-standard"): "",
+            }.get(key)
+
+        mock_git.side_effect = git_dispatcher
+
+        result = check_for_updates(repo=fake_repo, force=True)
+        assert result is not None
+        assert result.has_update is True
+        assert result.conflict_files == ["src/b.py"]
+
+    @patch("arxiv_explorer.core.update_checker._run_git")
+    @patch(
+        "arxiv_explorer.core.update_checker._get_tracking_branch",
+        return_value="origin/main",
+    )
+    def test_fetch_failure_returns_error(self, _mock_track, mock_git, fake_repo):
+        """Network failure during fetch — returns error status."""
+        mock_git.return_value = None  # all git commands fail
+
+        result = check_for_updates(repo=fake_repo, force=True)
+        assert result is not None
+        assert result.error is not None
+        assert "fetch" in result.error
+
+    @patch("arxiv_explorer.core.update_checker._run_git")
+    @patch(
+        "arxiv_explorer.core.update_checker._get_tracking_branch",
+        return_value="origin/main",
+    )
+    def test_local_ahead_no_update(self, _mock_track, mock_git, fake_repo):
+        """Local is ahead of remote — no update needed."""
+
+        def git_dispatcher(repo, *args, **kw):
+            return {
+                ("fetch", "origin", "--quiet"): "",
+                ("rev-parse", "HEAD"): "local111",
+                ("rev-parse", "origin/main"): "remote222",
+                ("rev-list", "--left-right", "--count", "HEAD...origin/main"): "3\t0",
+            }.get(args)
+
+        mock_git.side_effect = git_dispatcher
+
+        result = check_for_updates(repo=fake_repo, force=True)
+        assert result is not None
+        assert result.has_update is False
+        assert result.ahead_count == 3
+
+
+# ── pull_updates Tests ───────────────────────────────────────────────
+
+
+class TestPullUpdates:
+    @patch("arxiv_explorer.core.update_checker._get_repo_root", return_value=None)
+    def test_not_a_repo(self, _mock):
+        success, msg = pull_updates(repo=None)
+        assert success is False
+
+    @patch("arxiv_explorer.core.update_checker._run_git")
+    def test_ff_pull_success(self, mock_git, fake_repo):
+        mock_git.return_value = "Fast-forward\n 2 files changed"
+        success, msg = pull_updates(repo=fake_repo)
+        assert success is True
+        assert "Fast-forward" in msg
+
+    @patch("arxiv_explorer.core.update_checker._run_git")
+    def test_ff_fails_normal_succeeds(self, mock_git, fake_repo):
+        mock_git.side_effect = [None, "Merge made by 'ort'"]
+        success, msg = pull_updates(repo=fake_repo)
+        assert success is True
+
+    @patch("arxiv_explorer.core.update_checker._run_git")
+    def test_both_fail(self, mock_git, fake_repo):
+        mock_git.return_value = None
+        success, msg = pull_updates(repo=fake_repo)
+        assert success is False
+        assert "failed" in msg

From 05dc2464ac6df1949b867948a17f830dd329ac37 Mon Sep 17 00:00:00 2001
From: axect <axect.tg@proton.me>
Date: Tue, 7 Apr 2026 15:12:47 +0800
Subject: [PATCH 3/5] fix: resolve ruff lint errors (f-string placeholders,
 import sorting, unused import)

---
 src/arxiv_explorer/cli/main.py            | 4 ++--
 src/arxiv_explorer/core/update_checker.py | 1 -
 tests/test_update_checker.py              | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/arxiv_explorer/cli/main.py b/src/arxiv_explorer/cli/main.py
index 752ebc6..1d52316 100644
--- a/src/arxiv_explorer/cli/main.py
+++ b/src/arxiv_explorer/cli/main.py
@@ -45,8 +45,8 @@ def _prompt_update(status: UpdateStatus) -> None:
     # Warn about conflicts
     if status.conflict_files:
         console.print(
-            f"\n[bold red]Warning:[/bold red] "
-            f"The following locally modified files also changed on remote:"
+            "\n[bold red]Warning:[/bold red] "
+            "The following locally modified files also changed on remote:"
         )
         for f in status.conflict_files:
             console.print(f"  [red]- {f}[/red]")
diff --git a/src/arxiv_explorer/core/update_checker.py b/src/arxiv_explorer/core/update_checker.py
index 1c4665e..20dba59 100644
--- a/src/arxiv_explorer/core/update_checker.py
+++ b/src/arxiv_explorer/core/update_checker.py
@@ -5,7 +5,6 @@
 from dataclasses import dataclass
 from pathlib import Path
 
-
 # Throttle: check at most once per this many seconds
 CHECK_INTERVAL_SECONDS = 12 * 60 * 60  # 12 hours
 
diff --git a/tests/test_update_checker.py b/tests/test_update_checker.py
index df5efb6..6e38830 100644
--- a/tests/test_update_checker.py
+++ b/tests/test_update_checker.py
@@ -2,7 +2,7 @@
 
 import time
 from pathlib import Path
-from unittest.mock import MagicMock, patch
+from unittest.mock import patch
 
 import pytest
 

From 5e8195808488152f88e3bb70894b818d46b71693 Mon Sep 17 00:00:00 2001
From: axect <axect.tg@proton.me>
Date: Tue, 7 Apr 2026 15:14:23 +0800
Subject: [PATCH 4/5] style: apply ruff format

---
 src/arxiv_explorer/services/review_service.py |  8 ++------
 tests/test_update_checker.py                  | 15 +++++++++------
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/arxiv_explorer/services/review_service.py b/src/arxiv_explorer/services/review_service.py
index 0449955..a437d53 100644
--- a/src/arxiv_explorer/services/review_service.py
+++ b/src/arxiv_explorer/services/review_service.py
@@ -223,9 +223,7 @@ def render_markdown(
         if len(review.authors) > 10:
             author_str += f" (+{len(review.authors) - 10} more)"
 
-        source_label = (
-            "Full text" if review.source_type == "full_text" else "Abstract only"
-        )
+        source_label = "Full text" if review.source_type == "full_text" else "Abstract only"
 
         # Quick Reference Card
         parts.append("| | |")
@@ -1421,9 +1419,7 @@ def _prompt_questions(self, header, full_text_md, **_) -> str:
     def _prompt_reading_guide(self, header, full_text_md, paper_sections, **_) -> str:
         sections_list = ""
         if paper_sections:
-            sections_list = ", ".join(
-                h for h in paper_sections.keys() if h != "_preamble"
-            )
+            sections_list = ", ".join(h for h in paper_sections.keys() if h != "_preamble")
 
         context = full_text_md[:3000] if full_text_md else ""
         context_block = f"Paper structure:\n{context}" if context else ""
diff --git a/tests/test_update_checker.py b/tests/test_update_checker.py
index 6e38830..8166f1c 100644
--- a/tests/test_update_checker.py
+++ b/tests/test_update_checker.py
@@ -98,12 +98,15 @@ def test_returns_none_when_throttled(self, fake_repo):
     def test_force_bypasses_throttle(self, fake_repo):
         """force=True should check even if recently checked."""
         _touch_stamp(fake_repo)
-        with patch(
-            "arxiv_explorer.core.update_checker._get_tracking_branch",
-            return_value=None,
-        ), patch(
-            "arxiv_explorer.core.update_checker._run_git",
-            return_value=None,
+        with (
+            patch(
+                "arxiv_explorer.core.update_checker._get_tracking_branch",
+                return_value=None,
+            ),
+            patch(
+                "arxiv_explorer.core.update_checker._run_git",
+                return_value=None,
+            ),
         ):
             result = check_for_updates(repo=fake_repo, force=True)
             # No tracking branch → returns None after fetch attempt

From c1f9aed0b08d51ce13ebd91b77d009d1286714f9 Mon Sep 17 00:00:00 2001
From: axect <axect.tg@proton.me>
Date: Tue, 7 Apr 2026 15:16:04 +0800
Subject: [PATCH 5/5] style: format arxiv_client.py (inherited from main)

---
 src/arxiv_explorer/services/arxiv_client.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/arxiv_explorer/services/arxiv_client.py b/src/arxiv_explorer/services/arxiv_client.py
index 0552d99..cc3cc2c 100644
--- a/src/arxiv_explorer/services/arxiv_client.py
+++ b/src/arxiv_explorer/services/arxiv_client.py
@@ -40,8 +40,9 @@ def _build_query(query: str) -> str:
         import re
 
         # Already formatted: contains field prefix or boolean operator
-        if re.search(r'\b(all|ti|au|abs|cat|co|jr|rn|id):', query) or \
-           re.search(r'\b(AND|OR|ANDNOT)\b', query):
+        if re.search(r"\b(all|ti|au|abs|cat|co|jr|rn|id):", query) or re.search(
+            r"\b(AND|OR|ANDNOT)\b", query
+        ):
             return query
 
         words = query.split()