Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/codespy/agents/reviewer/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ class ScopeResult(BaseModel):
changed_files: list[ChangedFile] = Field(
default_factory=list, description="Changed files belonging to this scope"
)
agentic_contexts: list[str] = Field(
default_factory=list,
description="Detected agentic context file paths (AI agent prompts, instructions, configs) relative to repo root",
)
reason: str = Field(description="Explanation for why this scope was identified")

model_config = {"arbitrary_types_allowed": True}
Expand Down
140 changes: 140 additions & 0 deletions src/codespy/agents/reviewer/modules/agentic_extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
"""Deterministic agentic context detector — finds AI agent prompts, instructions, and configs."""

import logging
from pathlib import Path

from codespy.tools.filesystem.client import FileSystem
from codespy.tools.filesystem.models import EntryType, TreeNode

logger = logging.getLogger(__name__)

# Single files at any depth that indicate agentic contexts (case-insensitive match).
_AGENTIC_SINGLE_FILES: set[str] = {
"claude.md",
"prompt.txt",
"system_prompt.txt",
"instructions.md",
"babyagi.md",
"agent_prompt.md",
"agent_instructions.md",
"task.md",
"memory.md",
"constraints.md",
"ai_settings.json",
"agent_config.yaml",
}

# Folder-based patterns: directory name → set of allowed extensions.
_AGENTIC_FOLDER_PATTERNS: dict[str, set[str]] = {
"prompts": {".md"},
"instructions": {".md"},
"tools": {".md"},
".clinerules": {".md"},
".rules": {".md"},
"config": {".json", ".yaml"},
}


def _matches_single_file(name: str) -> bool:
"""Check if a filename matches a known agentic single-file pattern."""
return name.lower() in _AGENTIC_SINGLE_FILES


def _is_agentic_folder(dir_name: str) -> set[str] | None:
"""Return allowed extensions if dir_name is a known agentic folder, else None."""
return _AGENTIC_FOLDER_PATTERNS.get(dir_name.lower())


def _collect_folder_files(node: TreeNode, prefix: str, allowed_exts: set[str]) -> list[str]:
"""Recursively collect files from an agentic folder that match allowed extensions."""
paths: list[str] = []
for child in node.children:
if child.entry_type == EntryType.FILE:
suffix = Path(child.name).suffix.lower()
if suffix in allowed_exts:
paths.append(f"{prefix}{child.name}")
elif child.entry_type == EntryType.DIRECTORY:
# Recurse into subdirectories within the agentic folder
paths.extend(_collect_folder_files(child, f"{prefix}{child.name}/", allowed_exts))
return paths


def _scan_tree(node: TreeNode, prefix: str = "") -> list[str]:
"""Recursively scan a tree for agentic context files.

Detects:
- Single files matching _AGENTIC_SINGLE_FILES at any depth
- Files inside known agentic folders matching _AGENTIC_FOLDER_PATTERNS
"""
paths: list[str] = []
for child in node.children:
rel = f"{prefix}{child.name}" if prefix else child.name
if child.entry_type == EntryType.DIRECTORY:
allowed_exts = _is_agentic_folder(child.name)
if allowed_exts is not None:
# Collect matching files directly inside this folder
paths.extend(_collect_folder_files(child, f"{rel}/", allowed_exts))
else:
# Recurse into non-agentic directories
paths.extend(_scan_tree(child, f"{rel}/"))
elif _matches_single_file(child.name):
paths.append(rel)
return paths


def detect_agentic_contexts(scope_root: Path) -> list[str]:
"""Detect agentic context files in a scope directory.

Single tree scan at depth 3 to find AI agent prompts, instructions,
and configuration files.

Args:
scope_root: Absolute path to the scope root directory.

Returns:
List of relative file paths for detected agentic contexts.
"""
try:
fs = FileSystem(scope_root, create_if_missing=False)
except Exception:
logger.debug(f"Cannot access scope root for agentic detection: {scope_root}")
return []

tree = fs.get_tree(max_depth=3, include_hidden=True)
agentic_files = _scan_tree(tree)

if agentic_files:
logger.info(f"Detected {len(agentic_files)} agentic context(s) in {scope_root}: {agentic_files}")

return sorted(agentic_files)


def extract_agentic_content(scope_root: Path, context_paths: list[str]) -> str:
"""Read and concatenate agentic context file contents.

Args:
scope_root: Absolute path to the scope root directory.
context_paths: List of relative paths to agentic context files.

Returns:
Concatenated content with ``=== filename ===`` headers,
or empty string if no files or all reads fail.
"""
if not context_paths:
return ""

try:
fs = FileSystem(scope_root, create_if_missing=False)
except Exception:
logger.debug(f"Cannot access scope root for agentic extraction: {scope_root}")
return ""

parts: list[str] = []
for path in context_paths:
try:
content = fs.read_file(path)
parts.append(f"=== {path} ===\n{content.content}")
except Exception as e: # noqa: BLE001
logger.warning(f"Could not read agentic context {path}: {e}")

return "\n\n".join(parts)
35 changes: 31 additions & 4 deletions src/codespy/agents/reviewer/modules/code_reviewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@

from codespy.agents import SignatureContext, get_cost_tracker
from codespy.agents.reviewer.models import Issue, IssueCategory, ScopeResult
from codespy.agents.reviewer.modules.agentic_extractor import extract_agentic_content
from codespy.agents.reviewer.modules.helpers import (
MIN_CONFIDENCE,
make_scope_relative,
resolve_scope_root,
restore_repo_paths,
strip_prefix,
)
from codespy.config import get_settings
from codespy.tools.mcp_utils import cleanup_mcp_contexts, connect_mcp_server
Expand All @@ -37,6 +39,12 @@ class CodeReviewSignature(dspy.Signature):
Review each changed file's patch. For each file, check ALL categories (A, B, C)
before moving to the next file.

AGENTIC CONTEXT: If agentic_context is non-empty, it contains AI agent
instruction/prompt/config files (e.g., claude.md, agent_config.yaml,
.clinerules/*.md) found in this scope. Use this as supporting context for
categories A, B, and C — it reveals the intended agent behavior, constraints,
and tool access patterns that may help you verify or dismiss findings.

TOOLS AVAILABLE:
- find_function_definitions: check function signatures and implementations
- find_function_calls: understand how functions are called, trace data flow
Expand Down Expand Up @@ -130,6 +138,11 @@ class CodeReviewSignature(dspy.Signature):
categories: list[IssueCategory] = dspy.InputField(
desc="Allowed issue categories. Use only these values for the 'category' field on each issue."
)
agentic_context: str = dspy.InputField(
desc="Content of AI agent instruction/prompt/config files found in this scope "
"(e.g., claude.md, agent_config.yaml, .clinerules/*.md). "
"Empty string if none detected. Use as supporting context for all categories."
)

issues: list[Issue] = dspy.OutputField(
desc="Verified issues. Category must be one of the provided categories. "
Expand Down Expand Up @@ -224,14 +237,28 @@ async def aforward(
max_iters=max_iters,
)
scoped = make_scope_relative(scope)
logger.info(
f" Code review: scope {scope.subroot} "
f"({len(scope.changed_files)} files)"
)
# Extract agentic context content for this scope
# agentic_contexts are repo-root-relative; strip subroot prefix for scope-relative paths
scope_relative_contexts = [
strip_prefix(h, scope.subroot) for h in scope.agentic_contexts
]
agentic_ctx = extract_agentic_content(scope_root, scope_relative_contexts)
if agentic_ctx:
logger.info(
f" Code review: scope {scope.subroot} "
f"({len(scope.changed_files)} files, "
f"{len(scope.agentic_contexts)} agentic contexts)"
)
else:
logger.info(
f" Code review: scope {scope.subroot} "
f"({len(scope.changed_files)} files)"
)
async with SignatureContext("code_review", self._cost_tracker):
result = await agent.acall(
scope=scoped,
categories=categories,
agentic_context=agentic_ctx,
)

issues = [
Expand Down
30 changes: 26 additions & 4 deletions src/codespy/agents/reviewer/modules/doc_reviewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@

from codespy.agents import SignatureContext, get_cost_tracker
from codespy.agents.reviewer.models import Issue, IssueCategory, ScopeResult
from codespy.agents.reviewer.modules.agentic_extractor import extract_agentic_content
from codespy.agents.reviewer.modules.doc_extractor import extract_documentation
from codespy.agents.reviewer.modules.helpers import (
MIN_CONFIDENCE,
make_scope_relative,
resolve_scope_root,
restore_repo_paths,
strip_prefix,
)
from codespy.config import get_settings

Expand All @@ -30,9 +32,12 @@ class DocReviewSignature(dspy.Signature):
You are given:
1. Code patches showing what changed
2. Current documentation content (README, .env.example, docs/, etc.)
3. Optionally, agentic context (AI agent instruction/prompt/config files)

Your job: identify documentation that is now WRONG or MISSING because of the
code changes. Cross-reference the patches against the documentation.
If agentic_context is non-empty, also check whether code changes affect
AI agent behavior and whether agent instructions/prompts need updating.

CHECK FOR:

Expand Down Expand Up @@ -80,6 +85,10 @@ class DocReviewSignature(dspy.Signature):
categories: list[IssueCategory] = dspy.InputField(
desc="Allowed issue categories. Use only these values."
)
agentic_context: str = dspy.InputField(
desc="Content of AI agent instruction/prompt/config files found in this scope. "
"Empty string if none detected. Check if code changes make these stale."
)

issues: list[Issue] = dspy.OutputField(
desc="Documentation issues. Category must be 'documentation'. "
Expand Down Expand Up @@ -156,16 +165,29 @@ async def aforward(
continue
try:
reviewer = dspy.ChainOfThought(DocReviewSignature)
logger.info(
f" Doc review: scope {scope.subroot} "
f"({len(scope.changed_files)} files)"
)
# Extract agentic context content for this scope
scope_relative_contexts = [
strip_prefix(h, scope.subroot) for h in scope.agentic_contexts
]
agentic_ctx = extract_agentic_content(scope_root, scope_relative_contexts)
if agentic_ctx:
logger.info(
f" Doc review: scope {scope.subroot} "
f"({len(scope.changed_files)} files, "
f"{len(scope.agentic_contexts)} agentic contexts)"
)
else:
logger.info(
f" Doc review: scope {scope.subroot} "
f"({len(scope.changed_files)} files)"
)
async with SignatureContext("doc", self._cost_tracker):
result = await asyncio.to_thread(
reviewer,
patches=patches,
documentation=documentation,
categories=[IssueCategory.DOCUMENTATION],
agentic_context=agentic_ctx,
)
issues = [
issue for issue in (result.issues or [])
Expand Down
13 changes: 13 additions & 0 deletions src/codespy/agents/reviewer/modules/scope_identifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from codespy.agents import SignatureContext, get_cost_tracker
from codespy.agents.reviewer.models import PackageManifest, ScopeResult, ScopeType
from codespy.agents.reviewer.modules.agentic_extractor import detect_agentic_contexts
from codespy.config import get_settings
from codespy.tools.git.models import ChangedFile, MergeRequest, should_review_file
from codespy.tools.mcp_utils import cleanup_mcp_contexts, connect_mcp_server
Expand Down Expand Up @@ -280,6 +281,18 @@ async def aforward(self, mr: MergeRequest, repo_path: Path, is_local: bool = Fal
)]
finally:
await cleanup_mcp_contexts(contexts)
# Detect agentic contexts in each scope
for scope in scopes:
scope_root = repo_path if scope.subroot == "." else repo_path / scope.subroot
agentic_files = detect_agentic_contexts(scope_root)
if agentic_files:
# Store paths relative to repo root (prefix with subroot)
if scope.subroot == ".":
scope.agentic_contexts = agentic_files
else:
scope.agentic_contexts = [f"{scope.subroot}/{h}" for h in agentic_files]
logger.info(f"Found {len(scope.agentic_contexts)} agentic context(s) for scope '{scope.subroot}': {scope.agentic_contexts}")

# Log results
total_files = sum(len(s.changed_files) for s in scopes)
logger.info(f"Identified {len(scopes)} scopes covering {total_files} files")
Expand Down
2 changes: 2 additions & 0 deletions src/codespy/agents/reviewer/reviewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@ def forward(self, config: ReviewConfig) -> ReviewResult:
logger.info(f" Lock file: {manifest.lock_file_path}")
if manifest.dependencies_changed:
logger.info(f" Dependencies changed: Yes")
if scope.agentic_contexts:
logger.info(f" agentic contexts: {scope.agentic_contexts}")

# Run review modules concurrently via asyncio.gather
module_names = ["code_reviewer", "doc_reviewer", "supply_chain_auditor"]
Expand Down