diff --git a/.env.example b/.env.example index 437651b..9b7345a 100644 --- a/.env.example +++ b/.env.example @@ -92,8 +92,8 @@ DEFAULT_MODEL=anthropic/claude-opus-4-6 # ============================================================================= # codespy uses a tiered model approach to balance review quality and cost: # -# Smart (DEFAULT_MODEL): Core analysis — defect detection, supply chain, -# scope identification, doc review. Requires strong reasoning. +# Smart (DEFAULT_MODEL): Core analysis — code review (bugs, security, smells), +# supply chain, scope identification, doc review. Requires strong reasoning. # Recommended: anthropic/claude-opus-4-6 or equivalent. # # Mid-tier (EXTRACTION_MODEL + DEDUPLICATION_MODEL): Structured field @@ -156,12 +156,12 @@ DEFAULT_MODEL=anthropic/claude-opus-4-6 # Format: SIGNATURE_NAME_SETTING=value # # Available signatures: -# - DEFECT_DETECTION (bugs, logic errors, security vulnerabilities) -# - SUPPLY_CHAIN (supply chain security analysis) -# - DOC_REVIEW (documentation review) -# - SCOPE_IDENTIFICATION (code scope detection) -# - DEDUPLICATION (issue deduplication) -# - SUMMARIZATION (PR summary generation) +# - CODE_REVIEW (bugs, security, removed defensive code, code smells) +# - DOC (compares patches against documentation) +# - SCOPE (code scope detection) +# - SUPPLY_CHAIN (supply chain security analysis) +# - DEDUPLICATION (issue deduplication) +# - SUMMARIZATION (PR summary generation) # # Available settings per signature: # - ENABLED (true/false) @@ -172,20 +172,20 @@ DEFAULT_MODEL=anthropic/claude-opus-4-6 # - TEMPERATURE (float) - Lower = more deterministic output # Examples: -# DEFECT_DETECTION_ENABLED=true -# DEFECT_DETECTION_MAX_ITERS=10 -# DEFECT_DETECTION_MODEL=anthropic/claude-sonnet-4-5-20250929 -# DEFECT_DETECTION_MAX_REASONING_TOKENS=512 -# DEFECT_DETECTION_TEMPERATURE=0.1 +# CODE_REVIEW_ENABLED=true +# CODE_REVIEW_MAX_ITERS=10 +# CODE_REVIEW_MODEL=anthropic/claude-sonnet-4-5-20250929 +# CODE_REVIEW_MAX_REASONING_TOKENS=512 +# CODE_REVIEW_TEMPERATURE=0.1 # SUPPLY_CHAIN_ENABLED=true -# DOC_REVIEW_ENABLED=true -# DOC_REVIEW_MODEL=anthropic/claude-sonnet-4-5-20250929 +# DOC_ENABLED=true +# DOC_MODEL=anthropic/claude-sonnet-4-5-20250929 -# SCOPE_IDENTIFICATION_ENABLED=true -# SCOPE_IDENTIFICATION_MAX_ITERS=10 -# SCOPE_IDENTIFICATION_MAX_REASONING_TOKENS=1024 +# SCOPE_ENABLED=true +# SCOPE_MAX_ITERS=10 +# SCOPE_MAX_REASONING_TOKENS=1024 # DEDUPLICATION_ENABLED=true # DEDUPLICATION_MODEL=anthropic/claude-sonnet-4-5-20250929 diff --git a/.github/workflows/codespy-review.yml.example b/.github/workflows/codespy-review.yml.example index 90a3387..9d88a3a 100644 --- a/.github/workflows/codespy-review.yml.example +++ b/.github/workflows/codespy-review.yml.example @@ -120,7 +120,7 @@ jobs: # default-temperature: '0.1' # # # Per-signature overrides -# code-and-doc-review-max-iters: '10' +# code-review-max-iters: '10' # # # Disable specific signatures # supply-chain-enabled: 'false' diff --git a/README.md b/README.md index 7d78ea4..56b6faf 100644 --- a/README.md +++ b/README.md @@ -486,7 +486,7 @@ export DEFAULT_MODEL=anthropic/claude-opus-4-6 export DEFAULT_MAX_ITERS=20 # Per-signature settings (use signature name, not module name) -export CODE_AND_DOC_REVIEW_MODEL=anthropic/claude-sonnet-4-5-20250929 +export CODE_REVIEW_MODEL=anthropic/claude-sonnet-4-5-20250929 # Output settings export OUTPUT_STDOUT=false @@ -642,10 +642,12 @@ output_git: true │ │ │ │ ┌──────────────────────────▼─────────────────────────────────┐ │ │ │ Parallel Review Modules │ │ -│ │ ┌──────────────┐ ┌──────────────────────────────────┐ │ │ -│ │ │Supply Chain │ │ Code & Doc Reviewer │ │ │ -│ │ │ Auditor │ │ (defects + documentation) │ │ │ -│ │ └──────────────┘ └──────────────────────────────────┘ │ │ +│ │ ┌──────────────┐ ┌──────────────┐ ┌──────────┐ │ │ +│ │ │ Supply Chain │ │ Code │ │ Doc │ │ │ +│ │ │ Auditor │ │ Reviewer │ │ Reviewer │ │ │ +│ │ │ │ │ (bug+sec+ │ │ │ │ │ +│ │ │ │ │ smell) │ │ │ │ │ +│ │ └──────────────┘ └──────────────┘ └──────────┘ │ │ │ └──────────────────────────┬─────────────────────────────────┘ │ │ │ │ │ ┌──────────────────────────▼─────────────────────────────────┐ │ @@ -688,8 +690,9 @@ The review is powered by DSPy signatures that structure the LLM's analysis: | Signature | Config Key | Description | |-----------|------------|-------------| -| **ScopeIdentifierSignature** | `scope_identification` | Identifies code scopes (frontend, backend, infra, microservice in mono repo, etc...) | -| **CodeAndDocReviewSignature** | `code_and_doc_review` | Detects verified bugs, security vulnerabilities, and stale/wrong documentation in a single pass | +| **ScopeIdentifierSignature** | `scope` | Identifies code scopes (frontend, backend, infra, microservice in mono repo, etc...) | +| **CodeReviewSignature** | `code_review` | Detects verified bugs, security vulnerabilities, removed defensive code, and code smells | +| **DocReviewSignature** | `doc` | Detects stale or wrong documentation caused by code changes | | **SupplyChainSecuritySignature** | `supply_chain` | Analyzes artifacts (Dockerfiles) and dependencies for supply chain security | | **IssueDeduplicationSignature** | `deduplication` | LLM-powered deduplication of issues across reviewers | | **MRSummarySignature** | `summarization` | Generates summary, quality assessment, and recommendation | diff --git a/action.yml b/action.yml index abb7d82..c94993f 100644 --- a/action.yml +++ b/action.yml @@ -99,59 +99,87 @@ inputs: default: 'true' # ========================================== - # SIGNATURE: scope_identification + # SIGNATURE: scope # ========================================== - scope-identification-enabled: + scope-enabled: description: 'Enable scope identification signature' required: false default: 'true' - scope-identification-model: + scope-model: description: 'Model for scope identification (empty = use default)' required: false - scope-identification-max-iters: + scope-max-iters: description: 'Max iterations for scope identification' required: false - scope-identification-max-context-size: + scope-max-context-size: description: 'Max context size for scope identification' required: false - scope-identification-max-reasoning-tokens: + scope-max-reasoning-tokens: description: 'Max reasoning tokens for scope identification' required: false - scope-identification-temperature: + scope-temperature: description: 'Temperature for scope identification' required: false # ========================================== - # SIGNATURE: code_and_doc_review + # SIGNATURE: code_review # ========================================== - code-and-doc-review-enabled: - description: 'Enable code and doc review signature (bugs, security vulnerabilities, and documentation)' + code-review-enabled: + description: 'Enable code review signature (bugs, security vulnerabilities, code smells)' required: false default: 'true' - code-and-doc-review-model: - description: 'Model for code and doc review (empty = use default)' + code-review-model: + description: 'Model for code review (empty = use default)' required: false - code-and-doc-review-max-iters: - description: 'Max iterations for code and doc review' + code-review-max-iters: + description: 'Max iterations for code review' required: false - code-and-doc-review-max-context-size: - description: 'Max context size for code and doc review' + code-review-max-context-size: + description: 'Max context size for code review' required: false - code-and-doc-review-max-reasoning-tokens: - description: 'Max reasoning tokens for code and doc review' + code-review-max-reasoning-tokens: + description: 'Max reasoning tokens for code review' required: false - code-and-doc-review-temperature: - description: 'Temperature for code and doc review' + code-review-temperature: + description: 'Temperature for code review' + required: false + + # ========================================== + # SIGNATURE: doc + # ========================================== + doc-enabled: + description: 'Enable documentation review signature (stale/wrong documentation)' + required: false + default: 'true' + + doc-model: + description: 'Model for doc review (empty = use default)' + required: false + + doc-max-iters: + description: 'Max iterations for doc review' + required: false + + doc-max-context-size: + description: 'Max context size for doc review' + required: false + + doc-max-reasoning-tokens: + description: 'Max reasoning tokens for doc review' + required: false + + doc-temperature: + description: 'Temperature for doc review' required: false # ========================================== @@ -317,20 +345,28 @@ runs: ENABLE_PROMPT_CACHING: ${{ inputs.enable-prompt-caching }} # Scope identification signature - SCOPE_IDENTIFICATION_ENABLED: ${{ inputs.scope-identification-enabled }} - SCOPE_IDENTIFICATION_MODEL: ${{ inputs.scope-identification-model }} - SCOPE_IDENTIFICATION_MAX_ITERS: ${{ inputs.scope-identification-max-iters }} - SCOPE_IDENTIFICATION_MAX_CONTEXT_SIZE: ${{ inputs.scope-identification-max-context-size }} - SCOPE_IDENTIFICATION_MAX_REASONING_TOKENS: ${{ inputs.scope-identification-max-reasoning-tokens }} - SCOPE_IDENTIFICATION_TEMPERATURE: ${{ inputs.scope-identification-temperature }} + SCOPE_ENABLED: ${{ inputs.scope-enabled }} + SCOPE_MODEL: ${{ inputs.scope-model }} + SCOPE_MAX_ITERS: ${{ inputs.scope-max-iters }} + SCOPE_MAX_CONTEXT_SIZE: ${{ inputs.scope-max-context-size }} + SCOPE_MAX_REASONING_TOKENS: ${{ inputs.scope-max-reasoning-tokens }} + SCOPE_TEMPERATURE: ${{ inputs.scope-temperature }} - # Code and doc review signature - CODE_AND_DOC_REVIEW_ENABLED: ${{ inputs.code-and-doc-review-enabled }} - CODE_AND_DOC_REVIEW_MODEL: ${{ inputs.code-and-doc-review-model }} - CODE_AND_DOC_REVIEW_MAX_ITERS: ${{ inputs.code-and-doc-review-max-iters }} - CODE_AND_DOC_REVIEW_MAX_CONTEXT_SIZE: ${{ inputs.code-and-doc-review-max-context-size }} - CODE_AND_DOC_REVIEW_MAX_REASONING_TOKENS: ${{ inputs.code-and-doc-review-max-reasoning-tokens }} - CODE_AND_DOC_REVIEW_TEMPERATURE: ${{ inputs.code-and-doc-review-temperature }} + # Code review signature + CODE_REVIEW_ENABLED: ${{ inputs.code-review-enabled }} + CODE_REVIEW_MODEL: ${{ inputs.code-review-model }} + CODE_REVIEW_MAX_ITERS: ${{ inputs.code-review-max-iters }} + CODE_REVIEW_MAX_CONTEXT_SIZE: ${{ inputs.code-review-max-context-size }} + CODE_REVIEW_MAX_REASONING_TOKENS: ${{ inputs.code-review-max-reasoning-tokens }} + CODE_REVIEW_TEMPERATURE: ${{ inputs.code-review-temperature }} + + # Doc review signature + DOC_ENABLED: ${{ inputs.doc-enabled }} + DOC_MODEL: ${{ inputs.doc-model }} + DOC_MAX_ITERS: ${{ inputs.doc-max-iters }} + DOC_MAX_CONTEXT_SIZE: ${{ inputs.doc-max-context-size }} + DOC_MAX_REASONING_TOKENS: ${{ inputs.doc-max-reasoning-tokens }} + DOC_TEMPERATURE: ${{ inputs.doc-temperature }} # Supply chain signature SUPPLY_CHAIN_ENABLED: ${{ inputs.supply-chain-enabled }} @@ -385,20 +421,28 @@ runs: [ -n "$ENABLE_PROMPT_CACHING" ] && DOCKER_ARGS="$DOCKER_ARGS -e ENABLE_PROMPT_CACHING" # Scope identification - [ -n "$SCOPE_IDENTIFICATION_ENABLED" ] && DOCKER_ARGS="$DOCKER_ARGS -e SCOPE_IDENTIFICATION_ENABLED" - [ -n "$SCOPE_IDENTIFICATION_MODEL" ] && DOCKER_ARGS="$DOCKER_ARGS -e SCOPE_IDENTIFICATION_MODEL" - [ -n "$SCOPE_IDENTIFICATION_MAX_ITERS" ] && DOCKER_ARGS="$DOCKER_ARGS -e SCOPE_IDENTIFICATION_MAX_ITERS" - [ -n "$SCOPE_IDENTIFICATION_MAX_CONTEXT_SIZE" ] && DOCKER_ARGS="$DOCKER_ARGS -e SCOPE_IDENTIFICATION_MAX_CONTEXT_SIZE" - [ -n "$SCOPE_IDENTIFICATION_MAX_REASONING_TOKENS" ] && DOCKER_ARGS="$DOCKER_ARGS -e SCOPE_IDENTIFICATION_MAX_REASONING_TOKENS" - [ -n "$SCOPE_IDENTIFICATION_TEMPERATURE" ] && DOCKER_ARGS="$DOCKER_ARGS -e SCOPE_IDENTIFICATION_TEMPERATURE" + [ -n "$SCOPE_ENABLED" ] && DOCKER_ARGS="$DOCKER_ARGS -e SCOPE_ENABLED" + [ -n "$SCOPE_MODEL" ] && DOCKER_ARGS="$DOCKER_ARGS -e SCOPE_MODEL" + [ -n "$SCOPE_MAX_ITERS" ] && DOCKER_ARGS="$DOCKER_ARGS -e SCOPE_MAX_ITERS" + [ -n "$SCOPE_MAX_CONTEXT_SIZE" ] && DOCKER_ARGS="$DOCKER_ARGS -e SCOPE_MAX_CONTEXT_SIZE" + [ -n "$SCOPE_MAX_REASONING_TOKENS" ] && DOCKER_ARGS="$DOCKER_ARGS -e SCOPE_MAX_REASONING_TOKENS" + [ -n "$SCOPE_TEMPERATURE" ] && DOCKER_ARGS="$DOCKER_ARGS -e SCOPE_TEMPERATURE" + + # Code review + [ -n "$CODE_REVIEW_ENABLED" ] && DOCKER_ARGS="$DOCKER_ARGS -e CODE_REVIEW_ENABLED" + [ -n "$CODE_REVIEW_MODEL" ] && DOCKER_ARGS="$DOCKER_ARGS -e CODE_REVIEW_MODEL" + [ -n "$CODE_REVIEW_MAX_ITERS" ] && DOCKER_ARGS="$DOCKER_ARGS -e CODE_REVIEW_MAX_ITERS" + [ -n "$CODE_REVIEW_MAX_CONTEXT_SIZE" ] && DOCKER_ARGS="$DOCKER_ARGS -e CODE_REVIEW_MAX_CONTEXT_SIZE" + [ -n "$CODE_REVIEW_MAX_REASONING_TOKENS" ] && DOCKER_ARGS="$DOCKER_ARGS -e CODE_REVIEW_MAX_REASONING_TOKENS" + [ -n "$CODE_REVIEW_TEMPERATURE" ] && DOCKER_ARGS="$DOCKER_ARGS -e CODE_REVIEW_TEMPERATURE" - # Code and doc review - [ -n "$CODE_AND_DOC_REVIEW_ENABLED" ] && DOCKER_ARGS="$DOCKER_ARGS -e CODE_AND_DOC_REVIEW_ENABLED" - [ -n "$CODE_AND_DOC_REVIEW_MODEL" ] && DOCKER_ARGS="$DOCKER_ARGS -e CODE_AND_DOC_REVIEW_MODEL" - [ -n "$CODE_AND_DOC_REVIEW_MAX_ITERS" ] && DOCKER_ARGS="$DOCKER_ARGS -e CODE_AND_DOC_REVIEW_MAX_ITERS" - [ -n "$CODE_AND_DOC_REVIEW_MAX_CONTEXT_SIZE" ] && DOCKER_ARGS="$DOCKER_ARGS -e CODE_AND_DOC_REVIEW_MAX_CONTEXT_SIZE" - [ -n "$CODE_AND_DOC_REVIEW_MAX_REASONING_TOKENS" ] && DOCKER_ARGS="$DOCKER_ARGS -e CODE_AND_DOC_REVIEW_MAX_REASONING_TOKENS" - [ -n "$CODE_AND_DOC_REVIEW_TEMPERATURE" ] && DOCKER_ARGS="$DOCKER_ARGS -e CODE_AND_DOC_REVIEW_TEMPERATURE" + # Doc review + [ -n "$DOC_ENABLED" ] && DOCKER_ARGS="$DOCKER_ARGS -e DOC_ENABLED" + [ -n "$DOC_MODEL" ] && DOCKER_ARGS="$DOCKER_ARGS -e DOC_MODEL" + [ -n "$DOC_MAX_ITERS" ] && DOCKER_ARGS="$DOCKER_ARGS -e DOC_MAX_ITERS" + [ -n "$DOC_MAX_CONTEXT_SIZE" ] && DOCKER_ARGS="$DOCKER_ARGS -e DOC_MAX_CONTEXT_SIZE" + [ -n "$DOC_MAX_REASONING_TOKENS" ] && DOCKER_ARGS="$DOCKER_ARGS -e DOC_MAX_REASONING_TOKENS" + [ -n "$DOC_TEMPERATURE" ] && DOCKER_ARGS="$DOCKER_ARGS -e DOC_TEMPERATURE" # Supply chain [ -n "$SUPPLY_CHAIN_ENABLED" ] && DOCKER_ARGS="$DOCKER_ARGS -e SUPPLY_CHAIN_ENABLED" diff --git a/codespy.yaml b/codespy.yaml index 2b60e0d..2f5af0c 100644 --- a/codespy.yaml +++ b/codespy.yaml @@ -5,8 +5,8 @@ # - Flat values: (e.g., OUTPUT_FORMAT=json, DEFAULT_MODEL=openai/gpt-5) # - Signature values: _ # Examples: -# CODE_AND_DOC_REVIEW_MAX_ITERS=15 -# CODE_AND_DOC_REVIEW_ENABLED=false +# CODE_REVIEW_MAX_ITERS=15 +# CODE_REVIEW_ENABLED=false # SUPPLY_CHAIN_MODEL=openai/gpt-4-turbo # # Priority: Environment Variables > YAML Config > Defaults @@ -67,8 +67,8 @@ gitlab: # ============================================================================ # codespy uses a tiered model approach to balance review quality and cost: # -# Smart (default_model): Used for core analysis — defect detection, supply -# chain, scope identification, doc review. Requires strong reasoning. +# Smart (default_model): Used for core analysis — code review (bugs, security, +# smells), supply chain, scope identification, doc review. Requires strong reasoning. # Recommended: anthropic/claude-opus-4-6 or equivalent. # # Mid-tier (extraction_model + deduplication): Used for structured field @@ -93,9 +93,9 @@ gitlab: # These apply to all signatures unless overridden per-signature default_model: anthropic/claude-opus-4-6 # DEFAULT_MODEL extraction_model: null # EXTRACTION_MODEL (falls back to default_model) -default_max_iters: 10 # DEFAULT_MAX_ITERS -default_max_context_size: 50000 # DEFAULT_MAX_CONTEXT_SIZE -default_max_reasoning_tokens: 4000 # DEFAULT_MAX_REASONING_TOKENS (limits LLM reasoning verbosity) +default_max_iters: 20 # DEFAULT_MAX_ITERS +default_max_context_size: 100000 # DEFAULT_MAX_CONTEXT_SIZE +default_max_reasoning_tokens: 6000 # DEFAULT_MAX_REASONING_TOKENS (limits LLM reasoning verbosity) default_temperature: 0 # DEFAULT_TEMPERATURE (lower = more deterministic output) # Global LLM reliability settings @@ -115,23 +115,33 @@ signatures: # When true: scans ALL artifacts (Dockerfiles, etc.) and manifests # When false (default): only scans artifacts/manifests that were modified in the MR - # Code & Doc Reviewer signature (bugs, logic errors, security vulnerabilities, and documentation) - code_and_doc_review: - enabled: true # CODE_AND_DOC_REVIEW_ENABLED - max_iters: null # CODE_AND_DOC_REVIEW_MAX_ITERS - model: null # CODE_AND_DOC_REVIEW_MODEL - max_context_size: null # CODE_AND_DOC_REVIEW_MAX_CONTEXT_SIZE - max_reasoning_tokens: null # CODE_AND_DOC_REVIEW_MAX_REASONING_TOKENS - temperature: null # CODE_AND_DOC_REVIEW_TEMPERATURE + # Code Reviewer signature (bugs, security, removed defensive code, code smells) + # Unified code review: bugs, security vulnerabilities, and code smells in a single agent pass per scope + code_review: + enabled: true # CODE_REVIEW_ENABLED + max_iters: null # CODE_REVIEW_MAX_ITERS + model: null # CODE_REVIEW_MODEL + max_context_size: null # CODE_REVIEW_MAX_CONTEXT_SIZE + max_reasoning_tokens: null # CODE_REVIEW_MAX_REASONING_TOKENS + temperature: null # CODE_REVIEW_TEMPERATURE + + # Documentation Reviewer signature (compares patches against extracted documentation) + # Note: doc extraction is now deterministic (no LLM) — see doc_extractor.py + doc: + enabled: true # DOC_ENABLED + model: null # DOC_MODEL + max_context_size: null # DOC_MAX_CONTEXT_SIZE + max_reasoning_tokens: null # DOC_MAX_REASONING_TOKENS + temperature: null # DOC_TEMPERATURE # Scope Identifier signature - scope_identification: - enabled: true # SCOPE_IDENTIFICATION_ENABLED - max_iters: null # SCOPE_IDENTIFICATION_MAX_ITERS - model: null # SCOPE_IDENTIFICATION_MODEL (Sonnet 4.5) - max_context_size: null # SCOPE_IDENTIFICATION_MAX_CONTEXT_SIZE - max_reasoning_tokens: null # SCOPE_IDENTIFICATION_MAX_REASONING_TOKENS - temperature: null # SCOPE_IDENTIFICATION_TEMPERATURE + scope: + enabled: true # SCOPE_ENABLED + max_iters: null # SCOPE_MAX_ITERS + model: null # SCOPE_MODEL + max_context_size: null # SCOPE_MAX_CONTEXT_SIZE + max_reasoning_tokens: null # SCOPE_MAX_REASONING_TOKENS + temperature: null # SCOPE_TEMPERATURE # Deduplicator signature deduplication: diff --git a/pyproject.toml b/pyproject.toml index 074f321..33521eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "codespy-ai" -version = "0.3.0" +version = "0.3.1" description = "Code review agent powered by DSPy" readme = "README.md" license = "MIT" diff --git a/src/codespy/__init__.py b/src/codespy/__init__.py index 13aefb3..d750a56 100644 --- a/src/codespy/__init__.py +++ b/src/codespy/__init__.py @@ -1,3 +1,3 @@ """codespy - Code review agent powered by DSPy.""" -__version__ = "0.3.0" +__version__ = "0.3.1" diff --git a/src/codespy/agents/cost_tracker.py b/src/codespy/agents/cost_tracker.py index 20b5e26..a8350fc 100644 --- a/src/codespy/agents/cost_tracker.py +++ b/src/codespy/agents/cost_tracker.py @@ -206,8 +206,8 @@ class SignatureContext: 3. Summing costs/tokens from new entries Usage: - with SignatureContext("code_and_doc_review", cost_tracker): - # All LLM calls here will be attributed to code_and_doc_review + with SignatureContext("code_review", cost_tracker): + # All LLM calls here will be attributed to code_review result = await agent.acall(...) """ diff --git a/src/codespy/agents/reviewer/models.py b/src/codespy/agents/reviewer/models.py index 6e4fa0f..7c2654a 100644 --- a/src/codespy/agents/reviewer/models.py +++ b/src/codespy/agents/reviewer/models.py @@ -23,6 +23,7 @@ class IssueCategory(str, Enum): SECURITY = "security" BUG = "bug" DOCUMENTATION = "documentation" + SMELL = "smell" class ScopeType(str, Enum): @@ -107,7 +108,7 @@ def location(self) -> str: class SignatureStatsResult(BaseModel): """Statistics for a single signature's execution during review.""" - name: str = Field(description="Signature name (e.g., code_and_doc_review, supply_chain)") + name: str = Field(description="Signature name (e.g., code_review, doc, scope, supply_chain, deduplication, summarization)") cost: float = Field(default=0.0, description="Cost in USD for this signature") tokens: int = Field(default=0, description="Tokens used by this signature") call_count: int = Field(default=0, description="Number of LLM calls made by this signature") @@ -183,6 +184,11 @@ def documentation_issues(self) -> list[Issue]: """Get all documentation issues.""" return [i for i in self.issues if i.category == IssueCategory.DOCUMENTATION] + @property + def smell_issues(self) -> list[Issue]: + """Get all code smell issues.""" + return [i for i in self.issues if i.category == IssueCategory.SMELL] + def issues_by_severity(self) -> dict[IssueSeverity, list[Issue]]: """Group issues by severity.""" result: dict[IssueSeverity, list[Issue]] = {s: [] for s in IssueSeverity} @@ -218,6 +224,7 @@ def to_markdown(self) -> str: f"- **Security:** {len(self.security_issues)}", f"- **Bugs:** {len(self.bug_issues)}", f"- **Documentation:** {len(self.documentation_issues)}", + f"- **Smells:** {len(self.smell_issues)}", "", ]) diff --git a/src/codespy/agents/reviewer/modules/__init__.py b/src/codespy/agents/reviewer/modules/__init__.py index 7033513..ff655cf 100644 --- a/src/codespy/agents/reviewer/modules/__init__.py +++ b/src/codespy/agents/reviewer/modules/__init__.py @@ -1,12 +1,14 @@ """DSPy modules for code review.""" -from codespy.agents.reviewer.modules.code_and_doc_reviewer import CodeAndDocReviewer +from codespy.agents.reviewer.modules.code_reviewer import CodeReviewer from codespy.agents.reviewer.modules.deduplicator import IssueDeduplicator +from codespy.agents.reviewer.modules.doc_reviewer import DocReviewer from codespy.agents.reviewer.modules.scope_identifier import ScopeIdentifier from codespy.agents.reviewer.modules.supply_chain_auditor import SupplyChainAuditor __all__ = [ - "CodeAndDocReviewer", + "CodeReviewer", + "DocReviewer", "IssueDeduplicator", "ScopeIdentifier", "SupplyChainAuditor", diff --git a/src/codespy/agents/reviewer/modules/code_and_doc_reviewer.py b/src/codespy/agents/reviewer/modules/code_and_doc_reviewer.py deleted file mode 100644 index ec0ad15..0000000 --- a/src/codespy/agents/reviewer/modules/code_and_doc_reviewer.py +++ /dev/null @@ -1,291 +0,0 @@ -"""Merged code defect detection and documentation review module.""" - -import asyncio -import logging -from pathlib import Path -from typing import Any, Sequence - -import dspy # type: ignore[import-untyped] - -from codespy.agents import SignatureContext, get_cost_tracker -from codespy.agents.reviewer.models import Issue, IssueCategory, ScopeResult -from codespy.agents.reviewer.modules.helpers import MIN_CONFIDENCE, make_scope_relative, resolve_scope_root, restore_repo_paths -from codespy.config import get_settings -from codespy.tools.mcp_utils import cleanup_mcp_contexts, connect_mcp_server - -logger = logging.getLogger(__name__) - - -class CodeAndDocReviewSignature(dspy.Signature): - """Detect VERIFIED code defects AND stale/wrong documentation in a scope. - - You are a busy Principal Engineer with very little time. - Be extremely terse. Use imperative mood ("Fix X", not "You should fix X"). - You have tools to explore the scope's filesystem, search for text, and analyze code. - All file paths are relative to the scope root directory (the current tool root). - Tools are restricted to this scope — you cannot access files outside it. - - ═══════════════════════════════════════════════════════════════════════════════ - PHASE 1 — READ DOCUMENTATION (MANDATORY FIRST STEP) - ═══════════════════════════════════════════════════════════════════════════════ - - Use read_file to read the README at the scope root: - - Path: readme.md OR README.md (paths are relative to scope root) - - This file provides essential context about the scope's purpose, API contracts, - configuration, and expected behavior. Use it to inform BOTH documentation - review AND defect detection. - - If README doesn't exist at scope root, search for alternative documentation files - (e.g., docs/, documentation/, README.rst) using get_tree or file_exists. - - ═══════════════════════════════════════════════════════════════════════════════ - PHASE 2 — ANALYZE CHANGES FOR DEFECTS AND DOCUMENTATION ISSUES - ═══════════════════════════════════════════════════════════════════════════════ - - Using the README context and the patches in scope.changed_files, look for: - - ── A. CODE DEFECTS (category = "bug" or "security") ────────────────────────── - - CRITICAL RULES: - - ONLY report defects you can VERIFY using the available tools - - Before reporting any defect, USE the tools to verify your assumptions - - DO NOT speculate about potential issues you cannot verify - - DO NOT report "might be", "could be", "possibly", "may cause" issues - - If you cannot point to the EXACT defective code with evidence, do NOT report it - - Quality over quantity: prefer 0 reports over 1 speculative report - - VERIFICATION WORKFLOW: - 1. Review each changed file's patch — the diff shows what changed - 2. For suspected defects that need verification beyond the patch: - - Use find_function_definitions to check function signatures and implementations - - Use find_function_calls to understand how functions are called and trace data flow - - Use find_function_usages/find_callers to trace usage patterns - - Use search_literal to find related patterns (sanitization, encoding, validation) - - Use read_file ONLY if you need broader context not visible in the patch - 3. Only report issues that are CONFIRMED by your verification - - BUGS & LOGIC ERRORS (category = "bug"): - - Logic errors: verify the condition is actually incorrect by checking related code - - Null/undefined references: verify the check is actually missing - - Resource leaks: verify there's no cleanup in finally/defer/close methods - - Error handling: verify errors aren't handled elsewhere in the call chain - - Type mismatches: verify types by checking definitions - - Off-by-one errors: verify by understanding the data structure bounds - - SECURITY VULNERABILITIES (category = "security"): - - Injection attacks (SQL, command, XSS): verify input reaches dangerous sink without sanitization - - Authentication/authorization issues: verify auth check is actually missing - - Sensitive data exposure: verify data is actually exposed, not just accessed - - Insecure cryptographic practices: verify the actual algorithm used - - Security misconfigurations: verify by checking actual config values - - Input validation issues: verify input is not validated - - Path traversal: verify path input is not sanitized - - Race conditions: verify shared state access without synchronization - - Memory safety issues: verify unsafe memory operations - - DO NOT report: - - Style issues or minor improvements - - Hypothetical vulnerabilities without evidence - - "Could be vulnerable if..." scenarios - - ── B. DOCUMENTATION ISSUES (category = "documentation") ────────────────────── - - Verify documentation against the specific changes in this scope: - - HTTP/API CHANGES (CRITICAL — high miss rate): - - Content-Type changes → check documented examples - - HTTP status code changes → update all references - - Response body structure changes → verify documented examples match - - New response fields → ensure documented - - Search docs for endpoint paths to find all examples - - FUNCTION/METHOD SIGNATURE CHANGES: - - Parameters added/removed/renamed → check if docs reference old signatures - - Return type changes → update all examples - - New public functions → ensure documented if scope has doc conventions - - CONFIGURATION & ENVIRONMENT VARIABLES: - - New config fields → check README Configuration section or .env.example - - Removed/renamed fields → search docs for old field names - - Default value changes → verify docs reflect new defaults - - ERROR TYPES & CODES: - - New/removed error types → check error documentation - - Error behavior changes (error → success) → BREAKING, must document - - HTTP status code semantics change → update API docs - - DATA MODELS & STRUCTS: - - New/removed fields in request/response structs → update API examples - - Field type changes → update examples - - CLI COMMANDS & FLAGS: - - New commands/flags → add to CLI reference - - Removed/renamed flags → search docs for old names - - ═══════════════════════════════════════════════════════════════════════════════ - OUTPUT RULES - ═══════════════════════════════════════════════════════════════════════════════ - - - Set category to one of the values provided in the categories input - - For security issues, include cwe_id where applicable - - Reference files by name and line number only — never copy source code into issues - - Do not repeat patch content in reasoning steps. Keep each reasoning step to 1-2 sentences - - Empty list if no issues found. No approval text ("LGTM", "looks good") - - description: ≤25 words, imperative tone, no filler ("Fix X", "Update Y section") - - No polite or conversational language ("I suggest", "Please consider", "Great") - - Do not populate code_snippet — use line numbers instead - """ - - scope: ScopeResult = dspy.InputField( - desc="Scope with changed files. Has: subroot, scope_type, " - "changed_files (filename + patch - analyze patch first), language, package_manifest. " - "File paths in changed_files are relative to the scope root (tool root)." - ) - categories: list[IssueCategory] = dspy.InputField( - desc="Allowed issue categories. Use only these values for the 'category' field on each issue." - ) - - issues: list[Issue] = dspy.OutputField( - desc="Verified defects and documentation issues. Category must be one of the provided categories. " - "Titles <10 words. Descriptions ≤25 words, imperative. Empty list if none. " - "File paths must be relative to scope root." - ) - - -class CodeAndDocReviewer(dspy.Module): - """Detects code defects and documentation issues in a single agentic pass. - - Merges the responsibilities of DefectDetector and DocumentationReviewer - to share MCP tool overhead and README context across both concerns. - - MCP tools are scope-restricted: for each scope, tools are rooted at - repo_path/scope.subroot so the agent cannot access files outside the scope. - """ - - def __init__(self) -> None: - """Initialize the code and doc reviewer.""" - super().__init__() - self._cost_tracker = get_cost_tracker() - self._settings = get_settings() - - async def _create_mcp_tools(self, scope_root: Path) -> tuple[list[Any], list[Any]]: - """Create DSPy tools from MCP servers, rooted at scope directory. - - Args: - scope_root: Path to the scope root directory (repo_path / scope.subroot) - - Returns: - Tuple of (tools list, contexts list for cleanup) - """ - tools: list[Any] = [] - contexts: list[Any] = [] - tools_dir = Path(__file__).parent.parent.parent.parent / "tools" - scope_root_str = str(scope_root) - caller = "code_and_doc_reviewer" - # Filesystem tools: read_file, list_directory, get_tree, file_exists, get_file_info - tools.extend( - await connect_mcp_server( - tools_dir / "filesystem" / "server.py", [scope_root_str], contexts, caller - ) - ) - # Ripgrep tools: search_literal, find_function_usages, find_type_usages, etc. - tools.extend( - await connect_mcp_server( - tools_dir / "parsers" / "ripgrep" / "server.py", [scope_root_str], contexts, caller - ) - ) - # Treesitter tools: find_function_definitions, find_function_calls, etc. - tools.extend( - await connect_mcp_server( - tools_dir / "parsers" / "treesitter" / "server.py", [scope_root_str], contexts, caller - ) - ) - return tools, contexts - - async def aforward( - self, scopes: Sequence[ScopeResult], repo_path: Path - ) -> list[Issue]: - """Analyze scopes for code defects and documentation issues. - - For each scope, MCP tools are created rooted at repo_path/scope.subroot - so the agent can only access files within the scope boundary. This prevents - unnecessary out-of-scope tool calls that waste tokens and cost. - - Args: - scopes: List of identified scopes with their changed files - repo_path: Path to the cloned repository - - Returns: - List of issues (bugs, security, documentation) found across all scopes - """ - if not self._settings.is_signature_enabled("code_and_doc_review"): - logger.debug("Skipping code_and_doc_review: disabled") - return [] - - changed_scopes = [s for s in scopes if s.has_changes and s.changed_files] - if not changed_scopes: - logger.info("No scopes with changes to review") - return [] - - all_issues: list[Issue] = [] - max_iters = self._settings.get_max_iters("code_and_doc_review") - - total_files = sum(len(s.changed_files) for s in changed_scopes) - logger.info( - f"Reviewing code and docs for {len(changed_scopes)} scopes " - f"({total_files} changed files)..." - ) - - for scope in changed_scopes: - # Scope-restrict MCP tools to the scope's subroot directory - scope_root = resolve_scope_root(repo_path, scope.subroot) - tools, contexts = await self._create_mcp_tools(scope_root) - try: - agent = dspy.ReAct( - signature=CodeAndDocReviewSignature, - tools=tools, - max_iters=max_iters, - ) - # Create scope-relative copy so file paths match the scoped tool root - scoped = make_scope_relative(scope) - logger.info( - f" Reviewing scope {scope.subroot} " - f"({len(scope.changed_files)} files)" - ) - async with SignatureContext("code_and_doc_review", self._cost_tracker): - result = await agent.acall( - scope=scoped, - categories=[IssueCategory.BUG, IssueCategory.SECURITY, IssueCategory.DOCUMENTATION], - ) - - issues = [ - issue for issue in (result.issues or []) - if issue.confidence >= MIN_CONFIDENCE - ] - # Restore repo-root-relative paths in reported issues - restore_repo_paths(issues, scope.subroot) - all_issues.extend(issues) - logger.debug( - f" Scope {scope.subroot}: {len(issues)} issues" - ) - except Exception as e: - logger.error(f"Review failed for scope {scope.subroot}: {e}") - finally: - await cleanup_mcp_contexts(contexts) - - logger.info(f"Code and doc review found {len(all_issues)} issues") - return all_issues - - def forward( - self, scopes: Sequence[ScopeResult], repo_path: Path - ) -> list[Issue]: - """Analyze scopes for defects and documentation issues (sync wrapper). - - Args: - scopes: List of identified scopes with their changed files - repo_path: Path to the cloned repository - - Returns: - List of issues found across all scopes - """ - return asyncio.run(self.aforward(scopes, repo_path)) \ No newline at end of file diff --git a/src/codespy/agents/reviewer/modules/code_reviewer.py b/src/codespy/agents/reviewer/modules/code_reviewer.py new file mode 100644 index 0000000..337ccce --- /dev/null +++ b/src/codespy/agents/reviewer/modules/code_reviewer.py @@ -0,0 +1,266 @@ +"""Unified code review module — defects, security, and code smells.""" + +import asyncio +import logging +from pathlib import Path +from typing import Any, Sequence + +import dspy # type: ignore[import-untyped] + +from codespy.agents import SignatureContext, get_cost_tracker +from codespy.agents.reviewer.models import Issue, IssueCategory, ScopeResult +from codespy.agents.reviewer.modules.helpers import ( + MIN_CONFIDENCE, + make_scope_relative, + resolve_scope_root, + restore_repo_paths, +) +from codespy.config import get_settings +from codespy.tools.mcp_utils import cleanup_mcp_contexts, connect_mcp_server + +logger = logging.getLogger(__name__) + + +class CodeReviewSignature(dspy.Signature): + """Detect VERIFIED code defects, security vulnerabilities, and code smells. + + You are a busy Principal Engineer with very little time. + Be extremely terse. Use imperative mood ("Fix X", not "You should fix X"). + You have tools to explore the scope's filesystem, search for text, and analyze code. + All file paths are relative to the scope root directory (the current tool root). + Tools are restricted to this scope — you cannot access files outside it. + + ═══════════════════════════════════════════════════════════════════════════════ + ANALYZE CHANGES + ═══════════════════════════════════════════════════════════════════════════════ + + Review each changed file's patch. For each file, check ALL categories (A, B, C) + before moving to the next file. + + TOOLS AVAILABLE: + - find_function_definitions: check function signatures and implementations + - find_function_calls: understand how functions are called, trace data flow + - find_function_usages / find_callers: trace usage patterns + - search_literal: find related patterns (sanitization, validation, naming) + - read_file:broader context not visible in the patch (use sparingly) + + ─── A. BUGS & LOGIC ERRORS (category = "bug") ─────────────────────────────── + + EVIDENCE STANDARD: STRICT — you must VERIFY with tools before reporting. + DO NOT speculate. No "might be", "could be", "possibly" issues. + If you cannot point to the EXACT defective code with evidence, do NOT report. + + - Logic errors: verify the condition is actually incorrect by checking related code + - Null/undefined references: verify the check is actually missing + - Resource leaks: verify there's no cleanup in finally/defer/close methods + - Error handling: verify errors aren't handled elsewhere in the call chain + - Type mismatches: verify types by checking definitions + - Off-by-one errors: verify by understanding the data structure bounds + + REMOVED DEFENSIVE CODE (always report as "bug"): + If a guard (bounds check, null check, Math.max/min, default value, try/catch, + sanitization) existed in removed lines (-) and is absent from added lines (+), + report it. Use read_file or search_literal to confirm the guard was not relocated + elsewhere in the same function/file. If it was moved, do not report. + + ─── B. SECURITY VULNERABILITIES (category = "security") ───────────────────── + + EVIDENCE STANDARD: STRICT — you must VERIFY with tools before reporting. + DO NOT speculate. Trace data flow from source to sink. + + - Injection (SQL, command, XSS): verify input reaches dangerous sink unsanitized + - Authentication/authorization: verify auth check is actually missing + - Sensitive data exposure: verify data is actually exposed, not just accessed + - Insecure crypto: verify the actual algorithm used + - Path traversal: verify path input is not sanitized + - Race conditions: verify shared state access without synchronization + + ─── C. CODE SMELLS (category = "smell") ────────────────────────────────────── + + EVIDENCE STANDARD: OBSERVATIONAL — the patch or function signature IS the evidence. + A function with 5 primitive parameters IS a smell — no further verification needed. + A variable named "data" IS a smell — the name itself is the evidence. + Use tools to confirm if needed (e.g., find_function_definitions for param lists), + but the structural pattern visible in the patch is sufficient to report. + + UNCOMMUNICATIVE NAMES: + - Variables not describing data they hold (data, info, item, temp, val) + - Functions not starting with a verb (user(), process()) + - Booleans not reading as predicates (valid, flag → isValid, hasPermission) + - Side-effect mismatch: getName() that writes to DB + + PRIMITIVE OBSESSION: + - Functions taking 3+ related primitive parameters that travel together + Example: (string zip, string city, string street) → suggest Address struct/class + - Use find_function_definitions to inspect parameter lists if not visible in patch + + COMPLEXITY: + - Double negatives, nested ternaries, >2 logical operators per condition + - Magic numbers in logic → suggest named constants + - 3+ nesting levels → suggest guard clauses/early returns + - 5+ switch/if-elif branches → suggest polymorphism + + YAGNI: + - Abstract class/interface with single implementation (verify with find_callers) + - Unused parameters accepted "for future use" + + DO NOT report as smells: + - Idiomatic short names (i, j, k, err, ctx, db, tx) + - Test file naming conventions + + ═══════════════════════════════════════════════════════════════════════════════ + OUTPUT RULES + ═══════════════════════════════════════════════════════════════════════════════ + + - Set category to one of the values provided in the categories input + - For security issues, include cwe_id where applicable + - Reference files by name and line number only — never copy source code into issues + - Do not repeat patch content in reasoning steps. Keep each step to 1-2 sentences + - Empty list if no issues found. No approval text ("LGTM", "looks good") + - description: ≤25 words, imperative tone, no filler ("Fix X", "Rename Y to Z") + - No polite or conversational language + - Do not populate code_snippet — use line numbers instead + """ + + scope: ScopeResult = dspy.InputField( + desc="Scope with changed files. Has: subroot, scope_type, " + "changed_files (filename + patch - analyze patch first), language, package_manifest. " + "File paths in changed_files are relative to the scope root (tool root)." + ) + categories: list[IssueCategory] = dspy.InputField( + desc="Allowed issue categories. Use only these values for the 'category' field on each issue." + ) + + issues: list[Issue] = dspy.OutputField( + desc="Verified issues. Category must be one of the provided categories. " + "Titles <10 words. Descriptions ≤25 words, imperative. Empty list if none. " + "File paths must be relative to scope root." + ) + + +class CodeReviewer(dspy.Module): + """Unified code reviewer — defects, security, and smells in a single pass. + + Merges DefectDetector and SmellDetector into one agent to avoid redundant + README reads, tool sessions, and input token costs per scope. + + MCP tools are scope-restricted: for each scope, tools are rooted at + repo_path/scope.subroot so the agent cannot access files outside the scope. + """ + + def __init__(self) -> None: + """Initialize the code reviewer.""" + super().__init__() + self._cost_tracker = get_cost_tracker() + self._settings = get_settings() + + async def _create_tools( + self, scope_root: Path + ) -> tuple[list[Any], list[Any]]: + """Create scope-restricted tools: filesystem + ripgrep + treesitter.""" + tools: list[Any] = [] + contexts: list[Any] = [] + tools_dir = Path(__file__).parent.parent.parent.parent / "tools" + scope_root_str = str(scope_root) + caller = "code_reviewer" + + tools.extend(await connect_mcp_server( + tools_dir / "filesystem" / "server.py", + [scope_root_str], contexts, caller, + )) + tools.extend(await connect_mcp_server( + tools_dir / "parsers" / "ripgrep" / "server.py", + [scope_root_str], contexts, caller, + )) + tools.extend(await connect_mcp_server( + tools_dir / "parsers" / "treesitter" / "server.py", + [scope_root_str], contexts, caller, + )) + return tools, contexts + + async def aforward( + self, scopes: Sequence[ScopeResult], repo_path: Path + ) -> list[Issue]: + """Analyze scopes for defects, security issues, and code smells. + + Args: + scopes: List of identified scopes with their changed files + repo_path: Path to the cloned repository + + Returns: + List of bug, security, and smell issues found across all scopes + """ + if not self._settings.is_signature_enabled("code_review"): + logger.debug("Skipping code_review: disabled") + return [] + + # Determine which categories are active + categories: list[IssueCategory] = [] + categories.append(IssueCategory.BUG) + categories.append(IssueCategory.SECURITY) + categories.append(IssueCategory.SMELL) + + changed_scopes = [s for s in scopes if s.has_changes and s.changed_files] + if not changed_scopes: + logger.info("No scopes with changes for code review") + return [] + + all_issues: list[Issue] = [] + max_iters = self._settings.get_max_iters("code_review") + + total_files = sum(len(s.changed_files) for s in changed_scopes) + logger.info( + f"Code review for {len(changed_scopes)} scopes " + f"({total_files} changed files)..." + ) + + for scope in changed_scopes: + scope_root = resolve_scope_root(repo_path, scope.subroot) + tools, contexts = await self._create_tools(scope_root) + try: + agent = dspy.ReAct( + signature=CodeReviewSignature, + tools=tools, + max_iters=max_iters, + ) + scoped = make_scope_relative(scope) + logger.info( + f" Code review: scope {scope.subroot} " + f"({len(scope.changed_files)} files)" + ) + async with SignatureContext("code_review", self._cost_tracker): + result = await agent.acall( + scope=scoped, + categories=categories, + ) + + issues = [ + issue for issue in (result.issues or []) + if issue.confidence >= MIN_CONFIDENCE + ] + restore_repo_paths(issues, scope.subroot) + all_issues.extend(issues) + logger.debug( + f" Scope {scope.subroot}: {len(issues)} code review issues" + ) + except Exception as e: + logger.error(f"Code review failed for scope {scope.subroot}: {e}") + finally: + await cleanup_mcp_contexts(contexts) + + logger.info(f"Code review found {len(all_issues)} issues") + return all_issues + + def forward( + self, scopes: Sequence[ScopeResult], repo_path: Path + ) -> list[Issue]: + """Analyze scopes for code issues (sync wrapper). + + Args: + scopes: List of identified scopes with their changed files + repo_path: Path to the cloned repository + + Returns: + List of bug, security, and smell issues found across all scopes + """ + return asyncio.run(self.aforward(scopes, repo_path)) diff --git a/src/codespy/agents/reviewer/modules/doc_extractor.py b/src/codespy/agents/reviewer/modules/doc_extractor.py new file mode 100644 index 0000000..fd4640e --- /dev/null +++ b/src/codespy/agents/reviewer/modules/doc_extractor.py @@ -0,0 +1,80 @@ +"""Deterministic documentation extractor — single tree scan, no LLM.""" + +import logging +import re +from pathlib import Path + +from codespy.tools.filesystem.client import FileSystem +from codespy.tools.filesystem.models import EntryType, TreeNode + +logger = logging.getLogger(__name__) + +# Filename patterns recognised as documentation (case-insensitive). +_DOC_FILE_RE = re.compile( + r"^(readme|changelog|contributing|\.env\.example)", + re.IGNORECASE, +) +# Top-level directory names whose *entire* contents count as docs. +_DOC_DIRS = {"docs", "documentation"} + + +def _collect_all_files(node: TreeNode, prefix: str) -> list[str]: + """Recursively collect all file paths under a tree node.""" + paths: list[str] = [] + for child in node.children: + rel = f"{prefix}{child.name}" + if child.entry_type == EntryType.DIRECTORY: + paths.extend(_collect_all_files(child, f"{rel}/")) + else: + paths.append(rel) + return paths + + +def _collect_doc_paths(node: TreeNode, prefix: str = "") -> list[str]: + """Walk a shallow tree and return relative paths of doc files. + + Matches: + - Root-level files: README*, CHANGELOG*, CONTRIBUTING*, .env.example + - Everything under docs/ or documentation/ directories + """ + paths: list[str] = [] + for child in node.children: + rel = f"{prefix}{child.name}" if prefix else child.name + if child.entry_type == EntryType.DIRECTORY: + if child.name.lower() in _DOC_DIRS: + paths.extend(_collect_all_files(child, f"{rel}/")) + else: + # Recurse into non-doc subdirs (depth-2 tree already limits this). + paths.extend(_collect_doc_paths(child, f"{rel}/")) + elif _DOC_FILE_RE.match(child.name): + paths.append(rel) + return paths + + +def extract_documentation(scope_root: Path) -> str: + """Extract documentation content from a scope directory. + + Single tree scan at depth 2, fast-fail if no doc files found. + + Returns: + Concatenated documentation with ``=== filename ===`` headers, + or empty string if no documentation exists. + """ + fs = FileSystem(scope_root, create_if_missing=False) + + # One tree scan — depth 2 covers root files + immediate subdirs. + tree = fs.get_tree(max_depth=2) + doc_paths = _collect_doc_paths(tree) + + if not doc_paths: + return "" + + parts: list[str] = [] + for path in doc_paths: + try: + content = fs.read_file(path) + parts.append(f"=== {path} ===\n{content.content}") + except Exception as e: # noqa: BLE001 + logger.warning(f"Could not read doc file {path}: {e}") + + return "\n\n".join(parts) diff --git a/src/codespy/agents/reviewer/modules/doc_reviewer.py b/src/codespy/agents/reviewer/modules/doc_reviewer.py new file mode 100644 index 0000000..b9530ff --- /dev/null +++ b/src/codespy/agents/reviewer/modules/doc_reviewer.py @@ -0,0 +1,197 @@ +"""Documentation review module — detects stale or wrong documentation.""" + +import asyncio +import logging +from pathlib import Path +from typing import Sequence + +import dspy # type: ignore[import-untyped] + +from codespy.agents import SignatureContext, get_cost_tracker +from codespy.agents.reviewer.models import Issue, IssueCategory, ScopeResult +from codespy.agents.reviewer.modules.doc_extractor import extract_documentation +from codespy.agents.reviewer.modules.helpers import ( + MIN_CONFIDENCE, + make_scope_relative, + resolve_scope_root, + restore_repo_paths, +) +from codespy.config import get_settings + +logger = logging.getLogger(__name__) + + +class DocReviewSignature(dspy.Signature): + """Detect stale or wrong documentation caused by code changes. + + You are a busy Principal Engineer. Be extremely terse. + Use imperative mood ("Update X", not "You should update X"). + + You are given: + 1. Code patches showing what changed + 2. Current documentation content (README, .env.example, docs/, etc.) + + Your job: identify documentation that is now WRONG or MISSING because of the + code changes. Cross-reference the patches against the documentation. + + CHECK FOR: + + HTTP/API CHANGES: + - Content-Type, status codes, response body changes → check documented examples + - New/removed endpoints → update docs + + FUNCTION/METHOD SIGNATURE CHANGES: + - Parameters added/removed/renamed → check if docs reference old signatures + - Return type changes → update examples + + CONFIGURATION & ENVIRONMENT VARIABLES: + - New config fields → check README Configuration section or .env.example + - Removed/renamed fields → find old names in documentation + - Default value changes → verify docs reflect new defaults + + CLI COMMANDS & FLAGS: + - New commands/flags → add to CLI reference + - Removed/renamed flags → find old names in documentation + + DATA MODELS: + - New/removed fields in structs → update API examples + + DO NOT report: + - Missing documentation for internal/private functions + - Style preferences in documentation + - Documentation that is correct but could be "better" + - Issues unrelated to the code changes in the patches + + OUTPUT RULES: + - Set category to "documentation" + - description: ≤25 words, imperative tone ("Update X section", "Add Y to README") + - Empty list if documentation is up to date. No approval text ("LGTM", "looks good") + - No polite or conversational language + """ + + patches: str = dspy.InputField( + desc="Code patches (diffs) showing what changed in this scope. " + "Each patch is prefixed with the filename." + ) + documentation: str = dspy.InputField( + desc="Current documentation content for this scope. " + "Each file is prefixed with === filename ===." + ) + categories: list[IssueCategory] = dspy.InputField( + desc="Allowed issue categories. Use only these values." + ) + + issues: list[Issue] = dspy.OutputField( + desc="Documentation issues. Category must be 'documentation'. " + "Titles <10 words. Descriptions ≤25 words, imperative. Empty list if none." + ) + + +class DocReviewer(dspy.Module): + """Detects stale or wrong documentation caused by code changes. + + Two-step approach per scope: + 1. Deterministic extraction (single tree scan + file reads, no LLM) + 2. DocReview (ChainOfThought, no tools): compares patches vs doc content + """ + + def __init__(self) -> None: + """Initialize the doc reviewer.""" + super().__init__() + self._cost_tracker = get_cost_tracker() + self._settings = get_settings() + + def _build_patches(self, scope: ScopeResult) -> str: + """Build compact patches representation.""" + parts: list[str] = [] + for f in scope.changed_files: + if f.patch: + parts.append(f"--- {f.filename} ---\n{f.patch}") + return "\n\n".join(parts) + + async def aforward( + self, scopes: Sequence[ScopeResult], repo_path: Path + ) -> list[Issue]: + """Analyze scopes for documentation issues. + + Args: + scopes: List of identified scopes with their changed files + repo_path: Path to the cloned repository + + Returns: + List of documentation issues found across all scopes + """ + if not self._settings.is_signature_enabled("doc"): + logger.debug("Skipping doc: disabled") + return [] + changed_scopes = [s for s in scopes if s.has_changes and s.changed_files] + if not changed_scopes: + logger.info("No scopes with changes for doc review") + return [] + all_issues: list[Issue] = [] + total_files = sum(len(s.changed_files) for s in changed_scopes) + logger.info( + f"Doc review for {len(changed_scopes)} scopes " + f"({total_files} changed files)..." + ) + for scope in changed_scopes: + scope_root = resolve_scope_root(repo_path, scope.subroot) + # Step 1: Extract documentation (deterministic — no LLM) + logger.info(f" Doc extraction: scope {scope.subroot}") + try: + documentation = extract_documentation(scope_root) + except Exception as e: + logger.error(f"Doc extraction failed for scope {scope.subroot}: {e}") + documentation = "" + if not documentation.strip(): + logger.debug( + f" No documentation found in {scope.subroot}, skipping doc review" + ) + continue + # Step 2: Review docs vs patches (ChainOfThought, no tools) + scoped = make_scope_relative(scope) + patches = self._build_patches(scoped) + if not patches: + logger.debug(f" No patches in {scope.subroot}, skipping doc review") + continue + try: + reviewer = dspy.ChainOfThought(DocReviewSignature) + logger.info( + f" Doc review: scope {scope.subroot} " + f"({len(scope.changed_files)} files)" + ) + async with SignatureContext("doc", self._cost_tracker): + result = await asyncio.to_thread( + reviewer, + patches=patches, + documentation=documentation, + categories=[IssueCategory.DOCUMENTATION], + ) + issues = [ + issue for issue in (result.issues or []) + if issue.confidence >= MIN_CONFIDENCE + ] + restore_repo_paths(issues, scope.subroot) + all_issues.extend(issues) + logger.debug( + f" Scope {scope.subroot}: {len(issues)} doc issues" + ) + except Exception as e: + logger.error(f"Doc review failed for scope {scope.subroot}: {e}") + + logger.info(f"Doc review found {len(all_issues)} issues") + return all_issues + + def forward( + self, scopes: Sequence[ScopeResult], repo_path: Path + ) -> list[Issue]: + """Analyze scopes for documentation issues (sync wrapper). + + Args: + scopes: List of identified scopes with their changed files + repo_path: Path to the cloned repository + + Returns: + List of documentation issues found across all scopes + """ + return asyncio.run(self.aforward(scopes, repo_path)) diff --git a/src/codespy/agents/reviewer/modules/helpers.py b/src/codespy/agents/reviewer/modules/helpers.py index 5b622c1..5e71b3a 100644 --- a/src/codespy/agents/reviewer/modules/helpers.py +++ b/src/codespy/agents/reviewer/modules/helpers.py @@ -122,7 +122,6 @@ def make_scope_relative(scope: ScopeResult) -> ScopeResult: if scope.subroot == ".": return scope # Already at repo root, no transformation needed - relative_files = [ ChangedFile( filename=strip_prefix(f.filename, scope.subroot), @@ -138,7 +137,6 @@ def make_scope_relative(scope: ScopeResult) -> ScopeResult: ) for f in scope.changed_files ] - # Adjust manifest paths too manifest = None if scope.package_manifest: @@ -152,7 +150,6 @@ def make_scope_relative(scope: ScopeResult) -> ScopeResult: package_manager=scope.package_manifest.package_manager, dependencies_changed=scope.package_manifest.dependencies_changed, ) - return SR( subroot=".", scope_type=scope.scope_type, @@ -178,12 +175,7 @@ def restore_repo_paths(issues: list[Issue], subroot: str) -> None: """ if subroot == "." or not subroot: return - prefix = subroot.rstrip("/") + "/" for issue in issues: if issue.filename and not issue.filename.startswith(prefix): issue.filename = prefix + issue.filename - - - - diff --git a/src/codespy/agents/reviewer/modules/scope_identifier.py b/src/codespy/agents/reviewer/modules/scope_identifier.py index 6912177..7e0f148 100644 --- a/src/codespy/agents/reviewer/modules/scope_identifier.py +++ b/src/codespy/agents/reviewer/modules/scope_identifier.py @@ -198,8 +198,8 @@ async def aforward(self, mr: MergeRequest, repo_path: Path) -> list[ScopeResult] return [] # Check if signature is enabled - if not self._settings.is_signature_enabled("scope_identification"): - logger.warning("scope_identification is disabled - using fallback single scope") + if not self._settings.is_signature_enabled("scope"): + logger.warning("scope is disabled - using fallback single scope") return [ScopeResult( subroot=".", scope_type=ScopeType.APPLICATION, @@ -217,9 +217,9 @@ async def aforward(self, mr: MergeRequest, repo_path: Path) -> list[ScopeResult] changed_files_map: dict[str, ChangedFile] = {f.filename: f for f in reviewable_files} try: # Get per-signature config - max_iters = self._settings.get_max_iters("scope_identification") - temperature = self._settings.get_temperature("scope_identification") - max_reasoning = self._settings.get_max_reasoning_tokens("scope_identification") + max_iters = self._settings.get_max_iters("scope") + temperature = self._settings.get_temperature("scope") + max_reasoning = self._settings.get_max_reasoning_tokens("scope") # Create ReAct agent agent = dspy.ReAct( @@ -228,8 +228,8 @@ async def aforward(self, mr: MergeRequest, repo_path: Path) -> list[ScopeResult] max_iters=max_iters, ) logger.info(f"Identifying scopes for {len(changed_file_paths)} changed files...") - # Track scope_identification signature costs - async with SignatureContext("scope_identification", self._cost_tracker): + # Track scope signature costs + async with SignatureContext("scope", self._cost_tracker): result = await agent.acall( changed_files=changed_file_paths, repo_owner=mr.repo_owner, diff --git a/src/codespy/agents/reviewer/reviewer.py b/src/codespy/agents/reviewer/reviewer.py index 095dad6..9c3ead2 100644 --- a/src/codespy/agents/reviewer/reviewer.py +++ b/src/codespy/agents/reviewer/reviewer.py @@ -19,7 +19,8 @@ LocalReviewConfig, ) from codespy.agents.reviewer.modules import ( - CodeAndDocReviewer, + CodeReviewer, + DocReviewer, IssueDeduplicator, ScopeIdentifier, SupplyChainAuditor, @@ -75,8 +76,9 @@ def __init__(self, settings: Settings | None = None) -> None: # Initialize all modules - they internally check if their signatures are enabled self.scope_identifier = ScopeIdentifier() + self.code_reviewer = CodeReviewer() + self.doc_reviewer = DocReviewer() self.supply_chain_auditor = SupplyChainAuditor() - self.code_and_doc_reviewer = CodeAndDocReviewer() self.deduplicator = IssueDeduplicator() def _verify_model_access(self) -> None: @@ -130,7 +132,8 @@ async def _run_review_modules( Aggregated list of issues from all modules """ tasks = [ - self.code_and_doc_reviewer.aforward(scopes=scopes, repo_path=repo_path), + self.code_reviewer.aforward(scopes=scopes, repo_path=repo_path), + self.doc_reviewer.aforward(scopes=scopes, repo_path=repo_path), self.supply_chain_auditor.aforward(scopes=scopes, repo_path=repo_path), ] results = await asyncio.gather(*tasks, return_exceptions=True) @@ -202,7 +205,7 @@ def forward(self, config: ReviewConfig) -> ReviewResult: logger.info(f" Dependencies changed: Yes") # Run review modules concurrently via asyncio.gather - module_names = ["code_and_doc_reviewer", "supply_chain_auditor"] + module_names = ["code_reviewer", "doc_reviewer", "supply_chain_auditor"] logger.info(f"Running review modules concurrently: {', '.join(module_names)}...") all_issues = asyncio.run( self._run_review_modules(scopes, repo_path, module_names) diff --git a/src/codespy/config_dspy.py b/src/codespy/config_dspy.py index b667124..c792b5c 100644 --- a/src/codespy/config_dspy.py +++ b/src/codespy/config_dspy.py @@ -23,14 +23,15 @@ class SignatureConfig(BaseModel): # Known signature names for env var routing SIGNATURE_NAMES = { - "code_and_doc_review", + "code_review", + "doc", + "scope", "supply_chain", - "scope_identification", "deduplication", "summarization", } -# Create uppercase prefixes for matching (e.g., "CODE_AND_DOC_REVIEW_") +# Create uppercase prefixes for matching (e.g., "CODE_REVIEW_", "SUPPLY_CHAIN_") SIGNATURE_PREFIXES = {name.upper() + "_": name for name in SIGNATURE_NAMES} # Known signature settings for validation @@ -59,7 +60,7 @@ def apply_signature_env_overrides(config: dict[str, Any]) -> dict[str, Any]: """Apply environment variable overrides to config for signature settings. Handles signature settings with pattern: - - CODE_AND_DOC_REVIEW_MAX_ITERS -> signatures.code_and_doc_review.max_iters + - CODE_REVIEW_MAX_ITERS -> signatures.code_review.max_iters - SUPPLY_CHAIN_ENABLED -> signatures.supply_chain.enabled Top-level settings (DEFAULT_MODEL, AWS_REGION, etc.) are handled directly