From 72a3915e8158ddace778b1eac03b11612954d21a Mon Sep 17 00:00:00 2001 From: Matt McKay Date: Mon, 25 May 2026 15:45:55 +1000 Subject: [PATCH 1/2] refactor: consolidate VALID_CATEGORIES; delete dead prompt_loader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses §4.1 + §4.2 of TECHNICAL-REVIEW.md. Before: the 8-category list was defined in four places that could drift silently — `cli.ALL_CATEGORIES` (list), `GitHubHandler.VALID_CATEGORIES` (set), `PromptLoader.VALID_CATEGORIES` (list), and the keys of `reviewer.RULE_EVALUATION_ORDER` (dict). Plus a duplicate hardcoded list inside `review_lecture_smart`. Test coverage only verified that two of those four matched. After: one tuple in the new `style_checker/categories.py` module that every other call site imports. RULE_EVALUATION_ORDER still owns the per-category rule ordering, but a new test asserts its keys are exactly `VALID_CATEGORIES` in the same order — drift fails CI loudly. prompt_loader.py was dead production code: only tests imported the PromptLoader class. The runtime (`reviewer.create_single_rule_prompt`) loads prompts directly. Deleting the module + its test removes the fourth source-of-truth for the category list and ~230 lines of unmaintained code, with no behavior change. Files touched: + style_checker/categories.py (new, 22 lines — the single source) - style_checker/prompt_loader.py (deleted, was ~230 lines, unused at runtime) - tests/test_prompt_loader.py (deleted, tested the deleted class) M style_checker/cli.py (import VALID_CATEGORIES; drop ALL_CATEGORIES alias) M style_checker/github_handler.py (drop class-attr; import VALID_CATEGORIES) M style_checker/reviewer.py (drop hardcoded list in review_lecture_smart) M tests/test_cli.py (import VALID_CATEGORIES from canonical location) M tests/test_parsing.py (drop fixture's class-attr setup) M tests/test_reviewer.py (import from categories; add drift-detection test) Test suite: 85 passed (was 91; net -6 from prompt_loader test deletion + 1 new RULE_EVALUATION_ORDER consistency test). --- style_checker/categories.py | 21 +++ style_checker/cli.py | 14 +- style_checker/github_handler.py | 12 +- style_checker/prompt_loader.py | 231 -------------------------------- style_checker/reviewer.py | 18 +-- tests/test_cli.py | 9 +- tests/test_parsing.py | 11 +- tests/test_prompt_loader.py | 77 ----------- tests/test_reviewer.py | 33 +++-- 9 files changed, 66 insertions(+), 360 deletions(-) create mode 100644 style_checker/categories.py delete mode 100644 style_checker/prompt_loader.py delete mode 100644 tests/test_prompt_loader.py diff --git a/style_checker/categories.py b/style_checker/categories.py new file mode 100644 index 0000000..dceefab --- /dev/null +++ b/style_checker/categories.py @@ -0,0 +1,21 @@ +""" +Single source of truth for the style-rule category names. + +Every category here must have a corresponding `{name}-rules.md` file in +`style_checker/rules/` and a matching entry in +`reviewer.RULE_EVALUATION_ORDER`. The consistency between this tuple and +`RULE_EVALUATION_ORDER.keys()` is enforced by a test in `tests/test_reviewer.py`. +""" + +# Ordered — index is the default category processing order used by +# `StyleReviewer.review_lecture_smart`. +VALID_CATEGORIES = ( + "writing", + "math", + "code", + "jax", + "figures", + "references", + "links", + "admonitions", +) diff --git a/style_checker/cli.py b/style_checker/cli.py index 8ccb4a7..b6c07f0 100644 --- a/style_checker/cli.py +++ b/style_checker/cli.py @@ -22,16 +22,10 @@ from datetime import datetime from style_checker import __version__ +from style_checker.categories import VALID_CATEGORIES from style_checker.reviewer import StyleReviewer -# All available categories (matches reviewer.RULE_EVALUATION_ORDER keys) -ALL_CATEGORIES = [ - "writing", "math", "code", "jax", - "figures", "references", "links", "admonitions", -] - - def display_width(s: str) -> int: """Calculate terminal display width, accounting for wide/emoji characters.""" w = 0 @@ -278,13 +272,13 @@ def main(): # Parse categories if args.categories: categories = [c.strip() for c in args.categories.split(",")] - invalid = [c for c in categories if c not in ALL_CATEGORIES] + invalid = [c for c in categories if c not in VALID_CATEGORIES] if invalid: print(f"Error: invalid categories: {', '.join(invalid)}", file=sys.stderr) - print(f"Valid categories: {', '.join(ALL_CATEGORIES)}", file=sys.stderr) + print(f"Valid categories: {', '.join(VALID_CATEGORIES)}", file=sys.stderr) sys.exit(1) else: - categories = list(ALL_CATEGORIES) + categories = list(VALID_CATEGORIES) # API key api_key = args.api_key or os.environ.get("ANTHROPIC_API_KEY") diff --git a/style_checker/github_handler.py b/style_checker/github_handler.py index 3281c42..47af966 100644 --- a/style_checker/github_handler.py +++ b/style_checker/github_handler.py @@ -11,16 +11,12 @@ from github import Github, GithubException from datetime import datetime +from .categories import VALID_CATEGORIES + class GitHubHandler: """Handles GitHub API interactions for PR and issue management""" - # Valid category names (must match files in style_checker/rules/) - VALID_CATEGORIES = { - 'writing', 'math', 'code', 'jax', - 'figures', 'references', 'links', 'admonitions' - } - def __init__(self, token: str, repository: str): """ Initialize GitHub handler @@ -79,10 +75,10 @@ def extract_lecture_from_comment(self, comment_body: str) -> Optional[Tuple[str, # Validate categories if categories != ['all']: - invalid = [c for c in categories if c not in self.VALID_CATEGORIES] + invalid = [c for c in categories if c not in VALID_CATEGORIES] if invalid: print(f"⚠️ Invalid categories: {', '.join(invalid)}") - print(f" Valid categories: {', '.join(sorted(self.VALID_CATEGORIES))}") + print(f" Valid categories: {', '.join(sorted(VALID_CATEGORIES))}") return None else: categories = ['all'] # Default to all categories diff --git a/style_checker/prompt_loader.py b/style_checker/prompt_loader.py deleted file mode 100644 index 0414abf..0000000 --- a/style_checker/prompt_loader.py +++ /dev/null @@ -1,231 +0,0 @@ -""" -Simple utility to load markdown prompt templates by category. - -Loads concise instruction prompts from prompts/ and detailed rules from rules/, -then combines them for the LLM. -""" - -from pathlib import Path -import re -from typing import List, Optional - - -class PromptLoader: - """Loads category-specific style guide prompts and rules from markdown files.""" - - VALID_CATEGORIES = [ - "writing", "math", "code", "jax", - "figures", "references", "links", "admonitions" - ] - - def __init__(self, prompts_dir: Optional[Path] = None, rules_dir: Optional[Path] = None): - """ - Initialize the prompt loader. - - Args: - prompts_dir: Directory containing prompt markdown files. - Defaults to style_checker/prompts/ - rules_dir: Directory containing rule markdown files. - Defaults to style_checker/rules/ - """ - if prompts_dir is None: - prompts_dir = Path(__file__).parent / "prompts" - if rules_dir is None: - rules_dir = Path(__file__).parent / "rules" - - self.prompts_dir = Path(prompts_dir) - self.rules_dir = Path(rules_dir) - - if not self.prompts_dir.exists(): - raise FileNotFoundError( - f"Prompts directory not found: {self.prompts_dir}" - ) - if not self.rules_dir.exists(): - raise FileNotFoundError( - f"Rules directory not found: {self.rules_dir}" - ) - - def load_prompt(self, categories: List[str], lecture_content: str) -> str: - """ - Load and combine prompts + rules for specified categories. - - Approach: - 1. Load concise instruction prompt - 2. Load detailed rules - 3. Combine: prompt + rules + lecture_content - - Args: - categories: List of category names (e.g., ["writing", "math"]) - lecture_content: The lecture markdown content to review - - Returns: - Complete prompt with instructions, rules, and lecture content - - Raises: - ValueError: If invalid category specified - FileNotFoundError: If prompt or rules file not found - """ - # Validate categories - invalid = [c for c in categories if c not in self.VALID_CATEGORIES] - if invalid: - raise ValueError( - f"Invalid categories: {invalid}. " - f"Valid options: {self.VALID_CATEGORIES}" - ) - - # For single category, use simple combination - if len(categories) == 1: - return self._load_single_category(categories[0], lecture_content) - - # For multiple categories, combine them - return self._combine_categories(categories, lecture_content) - - def _load_single_category(self, category: str, lecture_content: str) -> str: - """ - Load prompt and rules for a single category. - - - prompt file: category-prompt.md - - rules file: category-rules.md - """ - # Load prompt (instruction template) - prompt_file = self.prompts_dir / f"{category}-prompt.md" - if not prompt_file.exists(): - raise FileNotFoundError(f"Prompt file not found: {prompt_file}") - - with open(prompt_file, 'r') as f: - prompt = f.read() - - # Debug: Check prompt version - if category == "writing": - # Check for version comment - version_match = re.search(r' - -You are a style checker for QuantEcon lecture files written in MyST Markdown. - -## Task - -Find all violations of the provided rule in the lecture document. - -First, silently analyze the entire document and identify candidate violations. -Then, verify each candidate — confirm the current text actually violates the rule and the fix changes the text. -Only include confirmed violations in your response. Report 0 if none exist. - -## Response Format - -```markdown -# Review Results - -## Summary -[1-2 sentence summary] - -## Issues Found -[NUMBER ONLY] - -## Violations - -### Violation 1: [rule-code] - [Rule Title] -**Severity:** error -**Location:** Line [X] / Section "[name]" -**Description:** [Why this violates the rule] -**Current text:** -~~~markdown -[exact quote] -~~~ -**Suggested fix:** -~~~markdown -[corrected version — MUST be different from current text] -~~~ -**Explanation:** [Why this fix resolves the violation] -``` - -If Issues Found is 0, do not include a Violations section. diff --git a/style_checker/prompts/figures-prompt.md b/style_checker/prompts/figures-prompt.md deleted file mode 100644 index 2508759..0000000 --- a/style_checker/prompts/figures-prompt.md +++ /dev/null @@ -1,41 +0,0 @@ - - -You are a style checker for QuantEcon lecture files written in MyST Markdown. - -## Task - -Find all violations of the provided rule in the lecture document. - -First, silently analyze the entire document and identify candidate violations. -Then, verify each candidate — confirm the current text actually violates the rule and the fix changes the text. -Only include confirmed violations in your response. Report 0 if none exist. - -## Response Format - -```markdown -# Review Results - -## Summary -[1-2 sentence summary] - -## Issues Found -[NUMBER ONLY] - -## Violations - -### Violation 1: [rule-code] - [Rule Title] -**Severity:** error -**Location:** Line [X] / Section "[name]" -**Description:** [Why this violates the rule] -**Current text:** -~~~markdown -[exact quote] -~~~ -**Suggested fix:** -~~~markdown -[corrected version — MUST be different from current text] -~~~ -**Explanation:** [Why this fix resolves the violation] -``` - -If Issues Found is 0, do not include a Violations section. diff --git a/style_checker/prompts/jax-prompt.md b/style_checker/prompts/jax-prompt.md deleted file mode 100644 index 2508759..0000000 --- a/style_checker/prompts/jax-prompt.md +++ /dev/null @@ -1,41 +0,0 @@ - - -You are a style checker for QuantEcon lecture files written in MyST Markdown. - -## Task - -Find all violations of the provided rule in the lecture document. - -First, silently analyze the entire document and identify candidate violations. -Then, verify each candidate — confirm the current text actually violates the rule and the fix changes the text. -Only include confirmed violations in your response. Report 0 if none exist. - -## Response Format - -```markdown -# Review Results - -## Summary -[1-2 sentence summary] - -## Issues Found -[NUMBER ONLY] - -## Violations - -### Violation 1: [rule-code] - [Rule Title] -**Severity:** error -**Location:** Line [X] / Section "[name]" -**Description:** [Why this violates the rule] -**Current text:** -~~~markdown -[exact quote] -~~~ -**Suggested fix:** -~~~markdown -[corrected version — MUST be different from current text] -~~~ -**Explanation:** [Why this fix resolves the violation] -``` - -If Issues Found is 0, do not include a Violations section. diff --git a/style_checker/prompts/links-prompt.md b/style_checker/prompts/links-prompt.md deleted file mode 100644 index 2508759..0000000 --- a/style_checker/prompts/links-prompt.md +++ /dev/null @@ -1,41 +0,0 @@ - - -You are a style checker for QuantEcon lecture files written in MyST Markdown. - -## Task - -Find all violations of the provided rule in the lecture document. - -First, silently analyze the entire document and identify candidate violations. -Then, verify each candidate — confirm the current text actually violates the rule and the fix changes the text. -Only include confirmed violations in your response. Report 0 if none exist. - -## Response Format - -```markdown -# Review Results - -## Summary -[1-2 sentence summary] - -## Issues Found -[NUMBER ONLY] - -## Violations - -### Violation 1: [rule-code] - [Rule Title] -**Severity:** error -**Location:** Line [X] / Section "[name]" -**Description:** [Why this violates the rule] -**Current text:** -~~~markdown -[exact quote] -~~~ -**Suggested fix:** -~~~markdown -[corrected version — MUST be different from current text] -~~~ -**Explanation:** [Why this fix resolves the violation] -``` - -If Issues Found is 0, do not include a Violations section. diff --git a/style_checker/prompts/math-prompt.md b/style_checker/prompts/math-prompt.md deleted file mode 100644 index 2508759..0000000 --- a/style_checker/prompts/math-prompt.md +++ /dev/null @@ -1,41 +0,0 @@ - - -You are a style checker for QuantEcon lecture files written in MyST Markdown. - -## Task - -Find all violations of the provided rule in the lecture document. - -First, silently analyze the entire document and identify candidate violations. -Then, verify each candidate — confirm the current text actually violates the rule and the fix changes the text. -Only include confirmed violations in your response. Report 0 if none exist. - -## Response Format - -```markdown -# Review Results - -## Summary -[1-2 sentence summary] - -## Issues Found -[NUMBER ONLY] - -## Violations - -### Violation 1: [rule-code] - [Rule Title] -**Severity:** error -**Location:** Line [X] / Section "[name]" -**Description:** [Why this violates the rule] -**Current text:** -~~~markdown -[exact quote] -~~~ -**Suggested fix:** -~~~markdown -[corrected version — MUST be different from current text] -~~~ -**Explanation:** [Why this fix resolves the violation] -``` - -If Issues Found is 0, do not include a Violations section. diff --git a/style_checker/prompts/admonitions-prompt.md b/style_checker/prompts/prompt.md similarity index 100% rename from style_checker/prompts/admonitions-prompt.md rename to style_checker/prompts/prompt.md diff --git a/style_checker/prompts/references-prompt.md b/style_checker/prompts/references-prompt.md deleted file mode 100644 index 2508759..0000000 --- a/style_checker/prompts/references-prompt.md +++ /dev/null @@ -1,41 +0,0 @@ - - -You are a style checker for QuantEcon lecture files written in MyST Markdown. - -## Task - -Find all violations of the provided rule in the lecture document. - -First, silently analyze the entire document and identify candidate violations. -Then, verify each candidate — confirm the current text actually violates the rule and the fix changes the text. -Only include confirmed violations in your response. Report 0 if none exist. - -## Response Format - -```markdown -# Review Results - -## Summary -[1-2 sentence summary] - -## Issues Found -[NUMBER ONLY] - -## Violations - -### Violation 1: [rule-code] - [Rule Title] -**Severity:** error -**Location:** Line [X] / Section "[name]" -**Description:** [Why this violates the rule] -**Current text:** -~~~markdown -[exact quote] -~~~ -**Suggested fix:** -~~~markdown -[corrected version — MUST be different from current text] -~~~ -**Explanation:** [Why this fix resolves the violation] -``` - -If Issues Found is 0, do not include a Violations section. diff --git a/style_checker/prompts/writing-prompt.md b/style_checker/prompts/writing-prompt.md deleted file mode 100644 index 2508759..0000000 --- a/style_checker/prompts/writing-prompt.md +++ /dev/null @@ -1,41 +0,0 @@ - - -You are a style checker for QuantEcon lecture files written in MyST Markdown. - -## Task - -Find all violations of the provided rule in the lecture document. - -First, silently analyze the entire document and identify candidate violations. -Then, verify each candidate — confirm the current text actually violates the rule and the fix changes the text. -Only include confirmed violations in your response. Report 0 if none exist. - -## Response Format - -```markdown -# Review Results - -## Summary -[1-2 sentence summary] - -## Issues Found -[NUMBER ONLY] - -## Violations - -### Violation 1: [rule-code] - [Rule Title] -**Severity:** error -**Location:** Line [X] / Section "[name]" -**Description:** [Why this violates the rule] -**Current text:** -~~~markdown -[exact quote] -~~~ -**Suggested fix:** -~~~markdown -[corrected version — MUST be different from current text] -~~~ -**Explanation:** [Why this fix resolves the violation] -``` - -If Issues Found is 0, do not include a Violations section. diff --git a/style_checker/reviewer.py b/style_checker/reviewer.py index 6a41874..c0886fb 100644 --- a/style_checker/reviewer.py +++ b/style_checker/reviewer.py @@ -154,25 +154,28 @@ def extract_individual_rules(category: str) -> List[Dict[str, str]]: def create_single_rule_prompt(category: str, rule: Dict[str, str], lecture_content: str) -> str: """ Create a focused prompt for checking a single rule. - + + The base prompt is rule-agnostic — a single `prompts/prompt.md` is shared + across all categories. The `category` arg is currently unused but kept in + the signature so callers don't need to change if a category-specific + prompt is ever reintroduced. + Args: - category: Category name (e.g., 'writing') + category: Category name (e.g., 'writing') — currently unused rule: Dict with 'rule_id', 'title', and 'content' lecture_content: The lecture to check - + Returns: Complete prompt focused on one specific rule """ - # Load the base prompt (without rules) prompts_dir = Path(__file__).parent / "prompts" - prompt_file = prompts_dir / f"{category}-prompt.md" - + prompt_file = prompts_dir / "prompt.md" + if not prompt_file.exists(): raise FileNotFoundError(f"Prompt file not found: {prompt_file}") - - with open(prompt_file, 'r') as f: - base_prompt = f.read() - + + base_prompt = prompt_file.read_text() + # Create focused prompt with single rule focused_prompt = f"""{base_prompt} @@ -186,7 +189,7 @@ def create_single_rule_prompt(category: str, rule: Dict[str, str], lecture_conte {lecture_content} """ - + return focused_prompt def parse_markdown_response(response: str) -> Dict[str, Any]: diff --git a/tests/README.md b/tests/README.md index 1e06f48..9d72d0a 100644 --- a/tests/README.md +++ b/tests/README.md @@ -33,21 +33,14 @@ Tests the fix application engine: - First-occurrence-only replacement - Fix quality validation warnings -### `test_prompt_loader.py` -Tests prompt and rules loading: -- Single and multi-category prompt loading -- All 8 categories loadable -- Invalid category error handling -- Prompt version tracking presence -- Rules file content validation - ### `test_reviewer.py` -Tests rule extraction and evaluation order: +Tests rule extraction, evaluation order, and prompt-file invariants: - Rule counts per category (49 total) - Rule type distribution (32 rule, 13 style, 4 migrate) -- RULE_EVALUATION_ORDER consistency with rule files +- RULE_EVALUATION_ORDER consistency with rule files AND with VALID_CATEGORIES (drift detection) - Rule field validation and ID format - No duplicate rule IDs +- Shared `prompts/prompt.md` exists and carries a version header ### `test_llm_integration.py` **Integration tests** that make real LLM API calls (marked with `@pytest.mark.integration`): @@ -107,7 +100,8 @@ pytest -m integration -v Install test dependencies: ```bash -pip install -r requirements.txt +uv sync --all-extras # recommended (uses uv.lock) +# or: pip install -e ".[dev,action]" ``` This includes: diff --git a/tests/test_reviewer.py b/tests/test_reviewer.py index 708e86d..9dd7fda 100644 --- a/tests/test_reviewer.py +++ b/tests/test_reviewer.py @@ -2,6 +2,9 @@ Tests for reviewer.py — extract_individual_rules() and RULE_EVALUATION_ORDER """ +from pathlib import Path + +import style_checker from style_checker.categories import VALID_CATEGORIES from style_checker.reviewer import ( extract_individual_rules, @@ -9,6 +12,9 @@ ) +PROMPTS_DIR = Path(style_checker.__file__).parent / "prompts" + + # Expected rule counts per category (from rule files) EXPECTED_RULE_COUNTS = { 'writing': 8, @@ -134,6 +140,29 @@ def test_no_extra_ids_in_order(self): f"'{category}': '{rule_id}' in order but not in rule file" +class TestPromptFile: + """Test the single shared prompt file used by create_single_rule_prompt. + + Replaces coverage from the deleted test_prompt_loader.py — but for a + single consolidated prompt instead of one file per category. + """ + + def test_prompt_file_exists(self): + """create_single_rule_prompt() raises FileNotFoundError at runtime if + the prompt file is missing — assert at test time so we catch packaging + regressions before they hit production.""" + prompt_file = PROMPTS_DIR / "prompt.md" + assert prompt_file.exists(), \ + f"Shared prompt file missing: {prompt_file}" + + def test_prompt_file_has_version_header(self): + """The prompt should carry a `` comment + so logs can record which prompt produced a given review.""" + content = (PROMPTS_DIR / "prompt.md").read_text() + assert "Prompt Version:" in content, \ + "prompt.md missing the 'Prompt Version: ...' header comment" + + class TestRuleTypeCounts: """Test that rule type distribution matches expectations"""