Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions src/arxiv_explorer/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from rich.console import Console

from ..core.database import init_db
from ..core.update_checker import UpdateStatus, check_for_updates, pull_updates

app = typer.Typer(
name="axp",
Expand All @@ -22,6 +23,58 @@ def version_callback(value: bool):
raise typer.Exit()


def _prompt_update(status: UpdateStatus) -> None:
"""Display update info, warn about conflicts, and prompt user."""
console.print(
f"\n[bold yellow]Update available[/bold yellow]: "
f"{status.behind_count} new commit{'s' if status.behind_count != 1 else ''} "
f"on remote"
)

if status.ahead_count > 0:
console.print(
f"[dim](local is also {status.ahead_count} commit{'s' if status.ahead_count != 1 else ''} "
f"ahead of remote)[/dim]"
)

# Show changed files summary
if status.changed_files:
n = len(status.changed_files)
console.print(f"[dim]Changed files: {n}[/dim]")

# Warn about conflicts
if status.conflict_files:
console.print(
"\n[bold red]Warning:[/bold red] "
"The following locally modified files also changed on remote:"
)
for f in status.conflict_files:
console.print(f" [red]- {f}[/red]")
console.print(
"[yellow]Pulling may cause merge conflicts. "
"Consider committing or stashing your local changes first.[/yellow]\n"
)

try:
answer = typer.prompt("Update now? [y/n]", default="n")
except (EOFError, KeyboardInterrupt):
console.print()
return

if answer.strip().lower() in ("y", "yes"):
console.print("[dim]Pulling updates...[/dim]")
success, message = pull_updates()
if success:
console.print(f"[green]Updated successfully.[/green] {message}")
console.print(
"[yellow]Note: if dependencies changed, run 'uv sync' to update them.[/yellow]\n"
)
else:
console.print(f"[red]Update failed:[/red] {message}\n")
else:
console.print("[dim]Skipped.[/dim]\n")


@app.callback()
def main(
version: bool = typer.Option(
Expand All @@ -32,11 +85,23 @@ def main(
is_eager=True,
help="Show version",
),
no_update_check: bool = typer.Option(
False,
"--no-update-check",
hidden=True,
help="Skip update check",
),
):
"""arXiv Explorer - Personalized paper recommendation system."""
# Initialize DB
init_db()

# Check for git updates (throttled, silent on failure)
if not no_update_check:
status = check_for_updates()
if status and status.has_update:
_prompt_update(status)


# Import and register subcommands
from . import config, daily, export, lists, notes, preferences, review, search # noqa: E402
Expand Down
9 changes: 6 additions & 3 deletions src/arxiv_explorer/cli/review.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,16 @@
ReviewSectionType.SECTION_SUMMARIES: "Section Summaries",
ReviewSectionType.METHODOLOGY: "Methodology Analysis",
ReviewSectionType.MATH_FORMULATIONS: "Math Formulations",
ReviewSectionType.FIGURES: "Figure Descriptions",
ReviewSectionType.TABLES: "Table Descriptions",
ReviewSectionType.FIGURES: "Figure Analysis",
ReviewSectionType.TABLES: "Table Analysis",
ReviewSectionType.EXPERIMENTAL_RESULTS: "Experimental Results",
ReviewSectionType.REPRODUCIBILITY: "Reproducibility Assessment",
ReviewSectionType.STRENGTHS_WEAKNESSES: "Strengths & Weaknesses",
ReviewSectionType.IMPACT_SIGNIFICANCE: "Impact & Significance",
ReviewSectionType.RELATED_WORK: "Related Work",
ReviewSectionType.GLOSSARY: "Glossary",
ReviewSectionType.QUESTIONS: "Questions",
ReviewSectionType.QUESTIONS: "Questions for Authors",
ReviewSectionType.READING_GUIDE: "Reading Guide",
}


Expand Down
3 changes: 3 additions & 0 deletions src/arxiv_explorer/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,13 @@ class ReviewSectionType(str, Enum):
FIGURES = "figures"
TABLES = "tables"
EXPERIMENTAL_RESULTS = "experimental_results"
REPRODUCIBILITY = "reproducibility"
STRENGTHS_WEAKNESSES = "strengths_weaknesses"
IMPACT_SIGNIFICANCE = "impact_significance"
RELATED_WORK = "related_work"
GLOSSARY = "glossary"
QUESTIONS = "questions"
READING_GUIDE = "reading_guide"


@dataclass
Expand Down
195 changes: 195 additions & 0 deletions src/arxiv_explorer/core/update_checker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
"""Git-based update checker with throttling and conflict detection."""

import subprocess
import time
from dataclasses import dataclass
from pathlib import Path

# Throttle: check at most once per this many seconds
CHECK_INTERVAL_SECONDS = 12 * 60 * 60 # 12 hours

# Git command timeout
GIT_TIMEOUT_SECONDS = 10


@dataclass
class UpdateStatus:
"""Result of an update check."""

has_update: bool = False
local_ref: str = ""
remote_ref: str = ""
behind_count: int = 0
ahead_count: int = 0
changed_files: list[str] | None = None # files changed on remote
conflict_files: list[str] | None = None # locally modified files that remote also changed
error: str | None = None


def _get_repo_root() -> Path | None:
"""Find the git repo root from the package's installed location."""
# Walk up from this file to find .git
current = Path(__file__).resolve().parent
for _ in range(10):
if (current / ".git").exists():
return current
parent = current.parent
if parent == current:
break
current = parent
return None


def _run_git(repo: Path, *args: str, timeout: int = GIT_TIMEOUT_SECONDS) -> str | None:
"""Run a git command, return stdout or None on failure."""
try:
result = subprocess.run(
["git", "-C", str(repo), *args],
capture_output=True,
text=True,
timeout=timeout,
)
if result.returncode == 0:
return result.stdout.strip()
return None
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
return None


def _get_stamp_path(repo: Path) -> Path:
"""Path to the last-check timestamp file."""
return repo / ".git" / "axp_update_check"


def _should_check(repo: Path) -> bool:
"""Return True if enough time has passed since the last check."""
stamp = _get_stamp_path(repo)
if not stamp.exists():
return True
try:
last = float(stamp.read_text().strip())
return (time.time() - last) >= CHECK_INTERVAL_SECONDS
except (ValueError, OSError):
return True


def _touch_stamp(repo: Path) -> None:
"""Record the current time as last-checked."""
try:
_get_stamp_path(repo).write_text(str(time.time()))
except OSError:
pass


def _get_tracking_branch(repo: Path) -> str | None:
"""Get the remote tracking branch for the current branch (e.g. 'origin/main')."""
branch = _run_git(repo, "rev-parse", "--abbrev-ref", "HEAD")
if not branch:
return None
upstream = _run_git(repo, "rev-parse", "--abbrev-ref", f"{branch}@{{upstream}}")
return upstream # e.g. "origin/main"


def check_for_updates(repo: Path | None = None, force: bool = False) -> UpdateStatus | None:
"""Check if the remote has new commits.

Returns UpdateStatus if a check was performed, None if skipped (throttled or not a repo).
"""
if repo is None:
repo = _get_repo_root()
if repo is None:
return None

if not force and not _should_check(repo):
return None

# Find tracking branch
upstream = _get_tracking_branch(repo)
if not upstream:
_touch_stamp(repo)
return None

remote_name = upstream.split("/")[0] if "/" in upstream else "origin"

# Fetch from remote (lightweight, no merge)
fetch_result = _run_git(repo, "fetch", remote_name, "--quiet")
if fetch_result is None:
# Network failure — silently skip
_touch_stamp(repo)
return UpdateStatus(error="fetch failed (network issue?)")

_touch_stamp(repo)

# Compare local HEAD vs upstream
local_ref = _run_git(repo, "rev-parse", "HEAD") or ""
remote_ref = _run_git(repo, "rev-parse", upstream) or ""

if local_ref == remote_ref:
return UpdateStatus(local_ref=local_ref, remote_ref=remote_ref)

# Count ahead/behind
rev_list = _run_git(repo, "rev-list", "--left-right", "--count", f"HEAD...{upstream}")
ahead, behind = 0, 0
if rev_list:
parts = rev_list.split()
if len(parts) == 2:
ahead, behind = int(parts[0]), int(parts[1])

if behind == 0:
# Local is ahead or in sync — no update needed
return UpdateStatus(
local_ref=local_ref,
remote_ref=remote_ref,
ahead_count=ahead,
)

# There are updates to pull — find which files changed
changed_raw = _run_git(repo, "diff", "--name-only", f"HEAD...{upstream}")
changed_files = changed_raw.splitlines() if changed_raw else []

# Detect potential conflicts: locally modified files that also changed on remote
local_modified_raw = _run_git(repo, "diff", "--name-only")
local_staged_raw = _run_git(repo, "diff", "--name-only", "--cached")

local_dirty: set[str] = set()
if local_modified_raw:
local_dirty.update(local_modified_raw.splitlines())
if local_staged_raw:
local_dirty.update(local_staged_raw.splitlines())

# Also check untracked files that overlap with remote changes
# (not common but possible if remote adds a file the user also created)
untracked_raw = _run_git(repo, "ls-files", "--others", "--exclude-standard")
if untracked_raw:
local_dirty.update(untracked_raw.splitlines())

conflict_files = sorted(local_dirty & set(changed_files))

return UpdateStatus(
has_update=True,
local_ref=local_ref,
remote_ref=remote_ref,
behind_count=behind,
ahead_count=ahead,
changed_files=changed_files,
conflict_files=conflict_files if conflict_files else None,
)


def pull_updates(repo: Path | None = None) -> tuple[bool, str]:
"""Run git pull. Returns (success, message)."""
if repo is None:
repo = _get_repo_root()
if repo is None:
return False, "Not a git repository"

result = _run_git(repo, "pull", "--ff-only", timeout=30)
if result is not None:
return True, result

# --ff-only failed, try normal pull
result = _run_git(repo, "pull", timeout=30)
if result is not None:
return True, result

return False, "git pull failed — you may need to resolve conflicts manually"
5 changes: 3 additions & 2 deletions src/arxiv_explorer/services/arxiv_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ def _build_query(query: str) -> str:
import re

# Already formatted: contains field prefix or boolean operator
if re.search(r'\b(all|ti|au|abs|cat|co|jr|rn|id):', query) or \
re.search(r'\b(AND|OR|ANDNOT)\b', query):
if re.search(r"\b(all|ti|au|abs|cat|co|jr|rn|id):", query) or re.search(
r"\b(AND|OR|ANDNOT)\b", query
):
return query

words = query.split()
Expand Down
Loading
Loading