diff --git a/src/codespy/agents/reviewer/models.py b/src/codespy/agents/reviewer/models.py index 0ab37ed..a2f6c07 100644 --- a/src/codespy/agents/reviewer/models.py +++ b/src/codespy/agents/reviewer/models.py @@ -72,6 +72,10 @@ class ScopeResult(BaseModel): changed_files: list[ChangedFile] = Field( default_factory=list, description="Changed files belonging to this scope" ) + agentic_contexts: list[str] = Field( + default_factory=list, + description="Detected agentic context file paths (AI agent prompts, instructions, configs) relative to repo root", + ) reason: str = Field(description="Explanation for why this scope was identified") model_config = {"arbitrary_types_allowed": True} diff --git a/src/codespy/agents/reviewer/modules/agentic_extractor.py b/src/codespy/agents/reviewer/modules/agentic_extractor.py new file mode 100644 index 0000000..1322b81 --- /dev/null +++ b/src/codespy/agents/reviewer/modules/agentic_extractor.py @@ -0,0 +1,140 @@ +"""Deterministic agentic context detector — finds AI agent prompts, instructions, and configs.""" + +import logging +from pathlib import Path + +from codespy.tools.filesystem.client import FileSystem +from codespy.tools.filesystem.models import EntryType, TreeNode + +logger = logging.getLogger(__name__) + +# Single files at any depth that indicate agentic contexts (case-insensitive match). +_AGENTIC_SINGLE_FILES: set[str] = { + "claude.md", + "prompt.txt", + "system_prompt.txt", + "instructions.md", + "babyagi.md", + "agent_prompt.md", + "agent_instructions.md", + "task.md", + "memory.md", + "constraints.md", + "ai_settings.json", + "agent_config.yaml", +} + +# Folder-based patterns: directory name → set of allowed extensions. +_AGENTIC_FOLDER_PATTERNS: dict[str, set[str]] = { + "prompts": {".md"}, + "instructions": {".md"}, + "tools": {".md"}, + ".clinerules": {".md"}, + ".rules": {".md"}, + "config": {".json", ".yaml"}, +} + + +def _matches_single_file(name: str) -> bool: + """Check if a filename matches a known agentic single-file pattern.""" + return name.lower() in _AGENTIC_SINGLE_FILES + + +def _is_agentic_folder(dir_name: str) -> set[str] | None: + """Return allowed extensions if dir_name is a known agentic folder, else None.""" + return _AGENTIC_FOLDER_PATTERNS.get(dir_name.lower()) + + +def _collect_folder_files(node: TreeNode, prefix: str, allowed_exts: set[str]) -> list[str]: + """Recursively collect files from an agentic folder that match allowed extensions.""" + paths: list[str] = [] + for child in node.children: + if child.entry_type == EntryType.FILE: + suffix = Path(child.name).suffix.lower() + if suffix in allowed_exts: + paths.append(f"{prefix}{child.name}") + elif child.entry_type == EntryType.DIRECTORY: + # Recurse into subdirectories within the agentic folder + paths.extend(_collect_folder_files(child, f"{prefix}{child.name}/", allowed_exts)) + return paths + + +def _scan_tree(node: TreeNode, prefix: str = "") -> list[str]: + """Recursively scan a tree for agentic context files. + + Detects: + - Single files matching _AGENTIC_SINGLE_FILES at any depth + - Files inside known agentic folders matching _AGENTIC_FOLDER_PATTERNS + """ + paths: list[str] = [] + for child in node.children: + rel = f"{prefix}{child.name}" if prefix else child.name + if child.entry_type == EntryType.DIRECTORY: + allowed_exts = _is_agentic_folder(child.name) + if allowed_exts is not None: + # Collect matching files directly inside this folder + paths.extend(_collect_folder_files(child, f"{rel}/", allowed_exts)) + else: + # Recurse into non-agentic directories + paths.extend(_scan_tree(child, f"{rel}/")) + elif _matches_single_file(child.name): + paths.append(rel) + return paths + + +def detect_agentic_contexts(scope_root: Path) -> list[str]: + """Detect agentic context files in a scope directory. + + Single tree scan at depth 3 to find AI agent prompts, instructions, + and configuration files. + + Args: + scope_root: Absolute path to the scope root directory. + + Returns: + List of relative file paths for detected agentic contexts. + """ + try: + fs = FileSystem(scope_root, create_if_missing=False) + except Exception: + logger.debug(f"Cannot access scope root for agentic detection: {scope_root}") + return [] + + tree = fs.get_tree(max_depth=3, include_hidden=True) + agentic_files = _scan_tree(tree) + + if agentic_files: + logger.info(f"Detected {len(agentic_files)} agentic context(s) in {scope_root}: {agentic_files}") + + return sorted(agentic_files) + + +def extract_agentic_content(scope_root: Path, context_paths: list[str]) -> str: + """Read and concatenate agentic context file contents. + + Args: + scope_root: Absolute path to the scope root directory. + context_paths: List of relative paths to agentic context files. + + Returns: + Concatenated content with ``=== filename ===`` headers, + or empty string if no files or all reads fail. + """ + if not context_paths: + return "" + + try: + fs = FileSystem(scope_root, create_if_missing=False) + except Exception: + logger.debug(f"Cannot access scope root for agentic extraction: {scope_root}") + return "" + + parts: list[str] = [] + for path in context_paths: + try: + content = fs.read_file(path) + parts.append(f"=== {path} ===\n{content.content}") + except Exception as e: # noqa: BLE001 + logger.warning(f"Could not read agentic context {path}: {e}") + + return "\n\n".join(parts) diff --git a/src/codespy/agents/reviewer/modules/code_reviewer.py b/src/codespy/agents/reviewer/modules/code_reviewer.py index 337ccce..c670aad 100644 --- a/src/codespy/agents/reviewer/modules/code_reviewer.py +++ b/src/codespy/agents/reviewer/modules/code_reviewer.py @@ -9,11 +9,13 @@ from codespy.agents import SignatureContext, get_cost_tracker from codespy.agents.reviewer.models import Issue, IssueCategory, ScopeResult +from codespy.agents.reviewer.modules.agentic_extractor import extract_agentic_content from codespy.agents.reviewer.modules.helpers import ( MIN_CONFIDENCE, make_scope_relative, resolve_scope_root, restore_repo_paths, + strip_prefix, ) from codespy.config import get_settings from codespy.tools.mcp_utils import cleanup_mcp_contexts, connect_mcp_server @@ -37,6 +39,12 @@ class CodeReviewSignature(dspy.Signature): Review each changed file's patch. For each file, check ALL categories (A, B, C) before moving to the next file. + AGENTIC CONTEXT: If agentic_context is non-empty, it contains AI agent + instruction/prompt/config files (e.g., claude.md, agent_config.yaml, + .clinerules/*.md) found in this scope. Use this as supporting context for + categories A, B, and C — it reveals the intended agent behavior, constraints, + and tool access patterns that may help you verify or dismiss findings. + TOOLS AVAILABLE: - find_function_definitions: check function signatures and implementations - find_function_calls: understand how functions are called, trace data flow @@ -130,6 +138,11 @@ class CodeReviewSignature(dspy.Signature): categories: list[IssueCategory] = dspy.InputField( desc="Allowed issue categories. Use only these values for the 'category' field on each issue." ) + agentic_context: str = dspy.InputField( + desc="Content of AI agent instruction/prompt/config files found in this scope " + "(e.g., claude.md, agent_config.yaml, .clinerules/*.md). " + "Empty string if none detected. Use as supporting context for all categories." + ) issues: list[Issue] = dspy.OutputField( desc="Verified issues. Category must be one of the provided categories. " @@ -224,14 +237,28 @@ async def aforward( max_iters=max_iters, ) scoped = make_scope_relative(scope) - logger.info( - f" Code review: scope {scope.subroot} " - f"({len(scope.changed_files)} files)" - ) + # Extract agentic context content for this scope + # agentic_contexts are repo-root-relative; strip subroot prefix for scope-relative paths + scope_relative_contexts = [ + strip_prefix(h, scope.subroot) for h in scope.agentic_contexts + ] + agentic_ctx = extract_agentic_content(scope_root, scope_relative_contexts) + if agentic_ctx: + logger.info( + f" Code review: scope {scope.subroot} " + f"({len(scope.changed_files)} files, " + f"{len(scope.agentic_contexts)} agentic contexts)" + ) + else: + logger.info( + f" Code review: scope {scope.subroot} " + f"({len(scope.changed_files)} files)" + ) async with SignatureContext("code_review", self._cost_tracker): result = await agent.acall( scope=scoped, categories=categories, + agentic_context=agentic_ctx, ) issues = [ diff --git a/src/codespy/agents/reviewer/modules/doc_reviewer.py b/src/codespy/agents/reviewer/modules/doc_reviewer.py index b9530ff..7099372 100644 --- a/src/codespy/agents/reviewer/modules/doc_reviewer.py +++ b/src/codespy/agents/reviewer/modules/doc_reviewer.py @@ -9,12 +9,14 @@ from codespy.agents import SignatureContext, get_cost_tracker from codespy.agents.reviewer.models import Issue, IssueCategory, ScopeResult +from codespy.agents.reviewer.modules.agentic_extractor import extract_agentic_content from codespy.agents.reviewer.modules.doc_extractor import extract_documentation from codespy.agents.reviewer.modules.helpers import ( MIN_CONFIDENCE, make_scope_relative, resolve_scope_root, restore_repo_paths, + strip_prefix, ) from codespy.config import get_settings @@ -30,9 +32,12 @@ class DocReviewSignature(dspy.Signature): You are given: 1. Code patches showing what changed 2. Current documentation content (README, .env.example, docs/, etc.) + 3. Optionally, agentic context (AI agent instruction/prompt/config files) Your job: identify documentation that is now WRONG or MISSING because of the code changes. Cross-reference the patches against the documentation. + If agentic_context is non-empty, also check whether code changes affect + AI agent behavior and whether agent instructions/prompts need updating. CHECK FOR: @@ -80,6 +85,10 @@ class DocReviewSignature(dspy.Signature): categories: list[IssueCategory] = dspy.InputField( desc="Allowed issue categories. Use only these values." ) + agentic_context: str = dspy.InputField( + desc="Content of AI agent instruction/prompt/config files found in this scope. " + "Empty string if none detected. Check if code changes make these stale." + ) issues: list[Issue] = dspy.OutputField( desc="Documentation issues. Category must be 'documentation'. " @@ -156,16 +165,29 @@ async def aforward( continue try: reviewer = dspy.ChainOfThought(DocReviewSignature) - logger.info( - f" Doc review: scope {scope.subroot} " - f"({len(scope.changed_files)} files)" - ) + # Extract agentic context content for this scope + scope_relative_contexts = [ + strip_prefix(h, scope.subroot) for h in scope.agentic_contexts + ] + agentic_ctx = extract_agentic_content(scope_root, scope_relative_contexts) + if agentic_ctx: + logger.info( + f" Doc review: scope {scope.subroot} " + f"({len(scope.changed_files)} files, " + f"{len(scope.agentic_contexts)} agentic contexts)" + ) + else: + logger.info( + f" Doc review: scope {scope.subroot} " + f"({len(scope.changed_files)} files)" + ) async with SignatureContext("doc", self._cost_tracker): result = await asyncio.to_thread( reviewer, patches=patches, documentation=documentation, categories=[IssueCategory.DOCUMENTATION], + agentic_context=agentic_ctx, ) issues = [ issue for issue in (result.issues or []) diff --git a/src/codespy/agents/reviewer/modules/scope_identifier.py b/src/codespy/agents/reviewer/modules/scope_identifier.py index 860fbd2..83cf7f6 100644 --- a/src/codespy/agents/reviewer/modules/scope_identifier.py +++ b/src/codespy/agents/reviewer/modules/scope_identifier.py @@ -10,6 +10,7 @@ from codespy.agents import SignatureContext, get_cost_tracker from codespy.agents.reviewer.models import PackageManifest, ScopeResult, ScopeType +from codespy.agents.reviewer.modules.agentic_extractor import detect_agentic_contexts from codespy.config import get_settings from codespy.tools.git.models import ChangedFile, MergeRequest, should_review_file from codespy.tools.mcp_utils import cleanup_mcp_contexts, connect_mcp_server @@ -280,6 +281,18 @@ async def aforward(self, mr: MergeRequest, repo_path: Path, is_local: bool = Fal )] finally: await cleanup_mcp_contexts(contexts) + # Detect agentic contexts in each scope + for scope in scopes: + scope_root = repo_path if scope.subroot == "." else repo_path / scope.subroot + agentic_files = detect_agentic_contexts(scope_root) + if agentic_files: + # Store paths relative to repo root (prefix with subroot) + if scope.subroot == ".": + scope.agentic_contexts = agentic_files + else: + scope.agentic_contexts = [f"{scope.subroot}/{h}" for h in agentic_files] + logger.info(f"Found {len(scope.agentic_contexts)} agentic context(s) for scope '{scope.subroot}': {scope.agentic_contexts}") + # Log results total_files = sum(len(s.changed_files) for s in scopes) logger.info(f"Identified {len(scopes)} scopes covering {total_files} files") diff --git a/src/codespy/agents/reviewer/reviewer.py b/src/codespy/agents/reviewer/reviewer.py index 50f69cd..f2ef849 100644 --- a/src/codespy/agents/reviewer/reviewer.py +++ b/src/codespy/agents/reviewer/reviewer.py @@ -202,6 +202,8 @@ def forward(self, config: ReviewConfig) -> ReviewResult: logger.info(f" Lock file: {manifest.lock_file_path}") if manifest.dependencies_changed: logger.info(f" Dependencies changed: Yes") + if scope.agentic_contexts: + logger.info(f" agentic contexts: {scope.agentic_contexts}") # Run review modules concurrently via asyncio.gather module_names = ["code_reviewer", "doc_reviewer", "supply_chain_auditor"]