From a26198e568ed4da9ec601a806020884cdaf0e143 Mon Sep 17 00:00:00 2001 From: Matthew Tibbits Date: Sun, 15 Mar 2026 13:24:44 +0000 Subject: [PATCH 1/6] test: add 23 unit tests for rate-limit artifact cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Covers _cleanup_rate_limit_artifacts() and _do_cleanup_rate_limit_artifacts() from PR #32. Uses tmp_path-based fake ~/.claude/ directories with Path.home() patched — no real filesystem state touched. Tests (CLN-001 through CLN-023): - Deletion of all four artifact types (JSONL, todo, debug, telemetry) - Preservation of old, large, non-empty, and uncorrelated files - Cleanup only triggered on rate-limit results (not success/failure) - Missing directories, per-file OSError isolation, top-level exception safety - Resolved working directory stashing, YAML non-persistence, fallback Co-Authored-By: Claude Opus 4.6 Signed-off-by: Matthew Tibbits --- tests/test_cleanup.py | 597 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 597 insertions(+) create mode 100644 tests/test_cleanup.py diff --git a/tests/test_cleanup.py b/tests/test_cleanup.py new file mode 100644 index 0000000..6c02c26 --- /dev/null +++ b/tests/test_cleanup.py @@ -0,0 +1,597 @@ +""" +Tests for rate-limit artifact cleanup in queue_manager.py. + +Covers _cleanup_rate_limit_artifacts() and _do_cleanup_rate_limit_artifacts(). +Uses tmp_path-based fake ~/.claude/ directories to avoid touching real state. +""" + +import os +import time +from datetime import datetime, timedelta +from pathlib import Path +from unittest.mock import patch + +import pytest + +from claude_code_queue.models import ( + ExecutionResult, + PromptStatus, + QueuedPrompt, + QueueState, + RateLimitInfo, +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +SESSION_UUID = "00134021-1e30-4928-b9af-e92a676ab248" +OTHER_UUID = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee" +FAKE_WORKING_DIR = "/home/testuser/project" + + +def _make_claude_dirs(tmp_path, working_dir=FAKE_WORKING_DIR): + """Create the four artifact directories under a fake ~/.claude/. + + Returns (claude_dir, jsonl_dir, todos_dir, debug_dir, telemetry_dir). + """ + claude_dir = tmp_path / ".claude" + encoded = working_dir.replace("/", "-") + jsonl_dir = claude_dir / "projects" / encoded + todos_dir = claude_dir / "todos" + debug_dir = claude_dir / "debug" + telemetry_dir = claude_dir / "telemetry" + for d in (jsonl_dir, todos_dir, debug_dir, telemetry_dir): + d.mkdir(parents=True) + return claude_dir, jsonl_dir, todos_dir, debug_dir, telemetry_dir + + +def _write_file(path, size_bytes=100, content=None): + """Write a file with a given size or explicit content.""" + if content is not None: + path.write_text(content) + else: + path.write_bytes(b"x" * size_bytes) + + +def _make_prompt(working_dir=FAKE_WORKING_DIR): + """Create a prompt with last_executed set to 1 second ago and resolved working dir. + + Using a 1-second offset avoids filesystem mtime-precision races: ext4 has + 1-second granularity, so a file written "now" may have an mtime equal to + or slightly before datetime.now().timestamp(). + """ + p = QueuedPrompt( + id="abc12345", + content="test task", + working_directory=working_dir, + status=PromptStatus.EXECUTING, + ) + p.last_executed = datetime.now() - timedelta(seconds=1) + p._resolved_working_directory = str(Path(working_dir).resolve()) + return p + + +def _rate_limit_result() -> ExecutionResult: + return ExecutionResult( + success=False, + output="usage limit reached", + error="", + rate_limit_info=RateLimitInfo(is_rate_limited=True, reset_time=None), + execution_time=0.1, + ) + + +# =========================================================================== +# Basic Cleanup — All Four Artifact Types (CLN-001 through CLN-004) +# =========================================================================== + + +def test_cleanup_deletes_rate_limited_jsonl(tmp_path, manager): # CLN-001 + """A small, recent JSONL file is deleted; its UUID is used for correlated cleanup.""" + claude_dir, jsonl_dir, *_ = _make_claude_dirs(tmp_path) + prompt = _make_prompt() + + jsonl_file = jsonl_dir / f"{SESSION_UUID}.jsonl" + _write_file(jsonl_file, size_bytes=4000) # 4 KB — rate-limited size + + with patch("pathlib.Path.home", return_value=tmp_path): + manager.state = QueueState() + manager.state.add_prompt(prompt) + manager._cleanup_rate_limit_artifacts(prompt) + + assert not jsonl_file.exists(), "Rate-limited JSONL file should be deleted" + assert "Cleaned up" in prompt.execution_log + + +def test_cleanup_deletes_correlated_todo_stub(tmp_path, manager): # CLN-002 + """A 2-byte todo stub whose UUID matches the JSONL file is deleted.""" + claude_dir, jsonl_dir, todos_dir, *_ = _make_claude_dirs(tmp_path) + prompt = _make_prompt() + + _write_file(jsonl_dir / f"{SESSION_UUID}.jsonl", size_bytes=4000) + todo_file = todos_dir / f"{SESSION_UUID}-agent-{SESSION_UUID}.json" + _write_file(todo_file, content="[]") + + with patch("pathlib.Path.home", return_value=tmp_path): + manager.state = QueueState() + manager.state.add_prompt(prompt) + manager._cleanup_rate_limit_artifacts(prompt) + + assert not todo_file.exists(), "2-byte todo stub should be deleted" + + +def test_cleanup_deletes_correlated_debug_file(tmp_path, manager): # CLN-003 + """A debug file whose UUID matches the JSONL file is deleted.""" + claude_dir, jsonl_dir, todos_dir, debug_dir, _ = _make_claude_dirs(tmp_path) + prompt = _make_prompt() + + _write_file(jsonl_dir / f"{SESSION_UUID}.jsonl", size_bytes=4000) + debug_file = debug_dir / f"{SESSION_UUID}.txt" + _write_file(debug_file, size_bytes=13000) + + with patch("pathlib.Path.home", return_value=tmp_path): + manager.state = QueueState() + manager.state.add_prompt(prompt) + manager._cleanup_rate_limit_artifacts(prompt) + + assert not debug_file.exists(), "Correlated debug file should be deleted" + + +def test_cleanup_deletes_correlated_telemetry_file(tmp_path, manager): # CLN-004 + """A telemetry file whose session UUID matches the JSONL file is deleted.""" + claude_dir, jsonl_dir, _, _, telemetry_dir = _make_claude_dirs(tmp_path) + prompt = _make_prompt() + + _write_file(jsonl_dir / f"{SESSION_UUID}.jsonl", size_bytes=4000) + telemetry_file = telemetry_dir / f"1p_failed_events.{SESSION_UUID}.{OTHER_UUID}.json" + _write_file(telemetry_file, size_bytes=30000) + + with patch("pathlib.Path.home", return_value=tmp_path): + manager.state = QueueState() + manager.state.add_prompt(prompt) + manager._cleanup_rate_limit_artifacts(prompt) + + assert not telemetry_file.exists(), "Correlated telemetry file should be deleted" + + +# =========================================================================== +# Preservation — Files That Must NOT Be Deleted (CLN-005 through CLN-009) +# =========================================================================== + + +def test_cleanup_preserves_old_jsonl(tmp_path, manager): # CLN-005 + """JSONL files older than last_executed are not deleted.""" + claude_dir, jsonl_dir, *_ = _make_claude_dirs(tmp_path) + + old_jsonl = jsonl_dir / f"{SESSION_UUID}.jsonl" + _write_file(old_jsonl, size_bytes=4000) + # Set mtime to 1 hour ago + old_time = time.time() - 3600 + os.utime(old_jsonl, (old_time, old_time)) + + prompt = _make_prompt() + + with patch("pathlib.Path.home", return_value=tmp_path): + manager.state = QueueState() + manager.state.add_prompt(prompt) + manager._cleanup_rate_limit_artifacts(prompt) + + assert old_jsonl.exists(), "Old JSONL file must be preserved" + + +def test_cleanup_preserves_large_jsonl(tmp_path, manager): # CLN-006 + """JSONL files >= 10 KB (successful runs) are not deleted even if recent.""" + claude_dir, jsonl_dir, *_ = _make_claude_dirs(tmp_path) + prompt = _make_prompt() + + large_jsonl = jsonl_dir / f"{SESSION_UUID}.jsonl" + _write_file(large_jsonl, size_bytes=150_000) # 150 KB — successful run + + with patch("pathlib.Path.home", return_value=tmp_path): + manager.state = QueueState() + manager.state.add_prompt(prompt) + manager._cleanup_rate_limit_artifacts(prompt) + + assert large_jsonl.exists(), "Large JSONL file (successful run) must be preserved" + + +def test_cleanup_preserves_non_empty_todo(tmp_path, manager): # CLN-007 + """Todo files > 2 bytes are not deleted even when UUID-correlated.""" + claude_dir, jsonl_dir, todos_dir, *_ = _make_claude_dirs(tmp_path) + prompt = _make_prompt() + + _write_file(jsonl_dir / f"{SESSION_UUID}.jsonl", size_bytes=4000) + real_todo = todos_dir / f"{SESSION_UUID}-agent-{SESSION_UUID}.json" + _write_file(real_todo, size_bytes=800) # legitimate todo + + with patch("pathlib.Path.home", return_value=tmp_path): + manager.state = QueueState() + manager.state.add_prompt(prompt) + manager._cleanup_rate_limit_artifacts(prompt) + + assert real_todo.exists(), "Non-empty todo file must be preserved" + + +def test_cleanup_preserves_old_debug_file(tmp_path, manager): # CLN-008 + """Debug files older than last_executed are not deleted even with UUID match.""" + claude_dir, jsonl_dir, _, debug_dir, _ = _make_claude_dirs(tmp_path) + + # Create JSONL file with current timestamp + _write_file(jsonl_dir / f"{SESSION_UUID}.jsonl", size_bytes=4000) + + # Create debug file with old timestamp + debug_file = debug_dir / f"{SESSION_UUID}.txt" + _write_file(debug_file, size_bytes=13000) + old_time = time.time() - 3600 + os.utime(debug_file, (old_time, old_time)) + + prompt = _make_prompt() + + with patch("pathlib.Path.home", return_value=tmp_path): + manager.state = QueueState() + manager.state.add_prompt(prompt) + manager._cleanup_rate_limit_artifacts(prompt) + + assert debug_file.exists(), "Old debug file must be preserved (timestamp guard)" + + +def test_cleanup_does_not_delete_debug_without_jsonl_match(tmp_path, manager): # CLN-009 + """Debug files are only deleted when their UUID matches a rate-limited JSONL file. + + If no JSONL file matches (e.g. it's >= 10 KB), the debug file is untouched. + """ + claude_dir, jsonl_dir, _, debug_dir, _ = _make_claude_dirs(tmp_path) + prompt = _make_prompt() + + # JSONL file is large (successful run) — no UUID collected + _write_file(jsonl_dir / f"{SESSION_UUID}.jsonl", size_bytes=150_000) + + # Debug file exists for same UUID — must NOT be deleted + debug_file = debug_dir / f"{SESSION_UUID}.txt" + _write_file(debug_file, size_bytes=13000) + + with patch("pathlib.Path.home", return_value=tmp_path): + manager.state = QueueState() + manager.state.add_prompt(prompt) + manager._cleanup_rate_limit_artifacts(prompt) + + assert debug_file.exists(), "Debug file must not be deleted without JSONL UUID match" + + +# =========================================================================== +# Cleanup Not Triggered for Other Result Types (CLN-010, CLN-011) +# =========================================================================== + + +def test_cleanup_not_called_on_success(tmp_path, manager, mocker): # CLN-010 + """Successful execution does not trigger artifact cleanup.""" + prompt = QueuedPrompt(content="task") + manager.state = manager.storage.load_queue_state() + manager.state.add_prompt(prompt) + + spy = mocker.patch.object(manager, "_cleanup_rate_limit_artifacts") + + success = ExecutionResult(success=True, output="done", error="", execution_time=0.1) + mocker.patch.object(manager.claude_interface, "execute_prompt", return_value=success) + manager._execute_prompt(prompt) + + spy.assert_not_called() + + +def test_cleanup_not_called_on_generic_failure(tmp_path, manager, mocker): # CLN-011 + """Generic failure does not trigger artifact cleanup.""" + prompt = QueuedPrompt(content="task", max_retries=3) + manager.state = manager.storage.load_queue_state() + manager.state.add_prompt(prompt) + + spy = mocker.patch.object(manager, "_cleanup_rate_limit_artifacts") + + fail = ExecutionResult(success=False, output="", error="oops", execution_time=0.1) + mocker.patch.object(manager.claude_interface, "execute_prompt", return_value=fail) + manager._execute_prompt(prompt) + + spy.assert_not_called() + + +def test_cleanup_called_on_rate_limit(tmp_path, manager, mocker): # CLN-012 + """Rate-limited execution triggers artifact cleanup.""" + prompt = QueuedPrompt(content="task", max_retries=3) + manager.state = manager.storage.load_queue_state() + manager.state.add_prompt(prompt) + + spy = mocker.patch.object(manager, "_cleanup_rate_limit_artifacts") + + mocker.patch.object( + manager.claude_interface, "execute_prompt", return_value=_rate_limit_result() + ) + manager._execute_prompt(prompt) + + spy.assert_called_once_with(prompt) + + +# =========================================================================== +# Missing Directories (CLN-013) +# =========================================================================== + + +def test_cleanup_handles_missing_directories(tmp_path, manager): # CLN-013 + """Cleanup does not crash when artifact directories don't exist.""" + # Point home at tmp_path which has no .claude/ at all + prompt = _make_prompt() + prompt.last_executed = datetime.now() + + with patch("pathlib.Path.home", return_value=tmp_path): + manager.state = QueueState() + manager.state.add_prompt(prompt) + # Should not raise + manager._cleanup_rate_limit_artifacts(prompt) + + # No log entry since nothing was deleted + assert "Cleaned up" not in prompt.execution_log + + +# =========================================================================== +# Per-File Exception Handling (CLN-014) +# =========================================================================== + + +def test_cleanup_continues_after_oserror_on_one_file(tmp_path, manager, mocker): # CLN-014 + """If stat() raises OSError on the debug file, the todo file is still deleted.""" + _, jsonl_dir, todos_dir, debug_dir, _ = _make_claude_dirs(tmp_path) + prompt = _make_prompt() + + # Create one small JSONL + _write_file(jsonl_dir / f"{SESSION_UUID}.jsonl", size_bytes=4000) + + # Create debug file normally + debug_file = debug_dir / f"{SESSION_UUID}.txt" + _write_file(debug_file, size_bytes=13000) + + # Create todo stub — should still be cleaned up despite debug failure + todo_file = todos_dir / f"{SESSION_UUID}-agent-{SESSION_UUID}.json" + _write_file(todo_file, content="[]") + + # Patch Path.stat to raise OSError only for the debug file + original_stat = Path.stat + + def selective_stat(self, *args, **kwargs): + if str(self) == str(debug_file): + raise OSError("Permission denied") + return original_stat(self, *args, **kwargs) + + mocker.patch.object(Path, "stat", selective_stat) + + with patch("pathlib.Path.home", return_value=tmp_path): + manager.state = QueueState() + manager.state.add_prompt(prompt) + manager._cleanup_rate_limit_artifacts(prompt) + + # Stop all mocks before checking file existence (exists() calls stat()) + mocker.stopall() + assert not todo_file.exists(), "Todo file should still be deleted despite debug OSError" + assert debug_file.exists(), "Debug file should survive (stat raised OSError)" + + +# =========================================================================== +# Top-Level Exception Safety (CLN-015) +# =========================================================================== + + +def test_cleanup_exception_does_not_break_result_processing(tmp_path, manager, mocker): # CLN-015 + """If the entire cleanup throws, _process_execution_result() still completes + and the prompt's RATE_LIMITED status is persisted. + """ + prompt = QueuedPrompt(content="task", max_retries=3) + manager.state = QueueState() + manager.state.add_prompt(prompt) + + # Make cleanup explode + mocker.patch.object( + manager, "_do_cleanup_rate_limit_artifacts", + side_effect=RuntimeError("disk on fire") + ) + + rl_result = _rate_limit_result() + prompt.status = PromptStatus.EXECUTING + prompt.last_executed = datetime.now() + manager._process_execution_result(prompt, rl_result) + + assert prompt.status == PromptStatus.RATE_LIMITED, ( + "Prompt must reach RATE_LIMITED status even when cleanup throws" + ) + assert manager.state.last_processed is not None, ( + "last_processed must be set even when cleanup throws" + ) + assert "artifact cleanup failed" in prompt.execution_log + + +# =========================================================================== +# No last_executed Guard (CLN-016) +# =========================================================================== + + +def test_cleanup_noop_without_last_executed(manager): # CLN-016 + """Cleanup is a no-op when prompt.last_executed is None.""" + prompt = QueuedPrompt(content="task") + prompt.last_executed = None + + manager.state = QueueState() + manager.state.add_prompt(prompt) + + # Should not raise and should not log + manager._cleanup_rate_limit_artifacts(prompt) + assert "Cleaned up" not in prompt.execution_log + + +# =========================================================================== +# Resolved Working Directory (CLN-017, CLN-018) +# =========================================================================== + + +def test_execute_prompt_stashes_resolved_working_directory(manager, mocker): # CLN-017 + """_execute_prompt() sets _resolved_working_directory on the prompt.""" + prompt = QueuedPrompt(content="task", working_directory="/some/path") + manager.state = manager.storage.load_queue_state() + manager.state.add_prompt(prompt) + + mocker.patch.object( + manager.claude_interface, "execute_prompt", + return_value=ExecutionResult(success=True, output="ok", error="", execution_time=0.1), + ) + manager._execute_prompt(prompt) + + assert prompt._resolved_working_directory is not None + assert prompt._resolved_working_directory == str(Path("/some/path").resolve()) + + +def test_cleanup_uses_resolved_working_directory(tmp_path, manager): # CLN-018 + """Cleanup uses _resolved_working_directory (not re-resolving working_directory).""" + # Set up dirs for the resolved path, not the relative one + claude_dir, jsonl_dir, *_ = _make_claude_dirs(tmp_path, working_dir=FAKE_WORKING_DIR) + + prompt = QueuedPrompt( + content="task", + working_directory=".", # relative — would resolve to CWD + status=PromptStatus.EXECUTING, + ) + prompt.last_executed = datetime.now() - timedelta(seconds=1) + prompt._resolved_working_directory = FAKE_WORKING_DIR # stashed at execution time + + jsonl_file = jsonl_dir / f"{SESSION_UUID}.jsonl" + _write_file(jsonl_file, size_bytes=4000) + + with patch("pathlib.Path.home", return_value=tmp_path): + manager.state = QueueState() + manager.state.add_prompt(prompt) + manager._cleanup_rate_limit_artifacts(prompt) + + assert not jsonl_file.exists(), ( + "Cleanup must use _resolved_working_directory, not re-resolve '.'" + ) + + +# =========================================================================== +# Deleted Count and Logging (CLN-019) +# =========================================================================== + + +def test_cleanup_counts_all_deleted_artifacts(tmp_path, manager, capsys): # CLN-019 + """The deleted count includes all four artifact types.""" + claude_dir, jsonl_dir, todos_dir, debug_dir, telemetry_dir = _make_claude_dirs(tmp_path) + prompt = _make_prompt() + + _write_file(jsonl_dir / f"{SESSION_UUID}.jsonl", size_bytes=4000) + _write_file(todos_dir / f"{SESSION_UUID}-agent-{SESSION_UUID}.json", content="[]") + _write_file(debug_dir / f"{SESSION_UUID}.txt", size_bytes=13000) + _write_file(telemetry_dir / f"1p_failed_events.{SESSION_UUID}.{OTHER_UUID}.json", size_bytes=30000) + + with patch("pathlib.Path.home", return_value=tmp_path): + manager.state = QueueState() + manager.state.add_prompt(prompt) + manager._cleanup_rate_limit_artifacts(prompt) + + assert "Cleaned up 4 rate-limit artifact(s)" in prompt.execution_log + captured = capsys.readouterr() + assert "[cleanup] Removed 4 rate-limit artifact(s)" in captured.out + + +def test_cleanup_no_log_when_nothing_deleted(tmp_path, manager, capsys): # CLN-020 + """No log entry or print when zero files are deleted.""" + _make_claude_dirs(tmp_path) + prompt = _make_prompt() + + with patch("pathlib.Path.home", return_value=tmp_path): + manager.state = QueueState() + manager.state.add_prompt(prompt) + manager._cleanup_rate_limit_artifacts(prompt) + + assert "Cleaned up" not in prompt.execution_log + captured = capsys.readouterr() + assert "[cleanup]" not in captured.out + + +# =========================================================================== +# JSONL Early Break (CLN-021) +# =========================================================================== + + +def test_cleanup_breaks_after_first_jsonl_match(tmp_path, manager): # CLN-021 + """Only one JSONL file is deleted per cleanup (one subprocess = one UUID). + + Even with multiple small recent JSONL files, only the first match is deleted. + """ + uuid2 = "99999999-aaaa-bbbb-cccc-dddddddddddd" + claude_dir, jsonl_dir, *_ = _make_claude_dirs(tmp_path) + prompt = _make_prompt() + + f1 = jsonl_dir / f"{SESSION_UUID}.jsonl" + f2 = jsonl_dir / f"{uuid2}.jsonl" + _write_file(f1, size_bytes=4000) + _write_file(f2, size_bytes=4000) + + with patch("pathlib.Path.home", return_value=tmp_path): + manager.state = QueueState() + manager.state.add_prompt(prompt) + manager._cleanup_rate_limit_artifacts(prompt) + + # Exactly one should be deleted (we don't know which due to glob ordering) + remaining = list(jsonl_dir.glob("*.jsonl")) + assert len(remaining) == 1, ( + f"Expected exactly 1 JSONL file remaining after cleanup, got {len(remaining)}" + ) + + +# =========================================================================== +# _resolved_working_directory Field (CLN-022) +# =========================================================================== + + +def test_resolved_working_directory_not_persisted_to_yaml(tmp_path, manager): # CLN-022 + """_resolved_working_directory is transient and not written to YAML frontmatter.""" + prompt = QueuedPrompt(content="task", working_directory="/some/path") + prompt._resolved_working_directory = "/some/path" + prompt.last_executed = datetime.now() + + manager.state = manager.storage.load_queue_state() + manager.state.add_prompt(prompt) + manager.storage.save_queue_state(manager.state) + + # Read the file back and check YAML doesn't contain the field + queue_files = list(manager.storage.queue_dir.glob("*.md")) + assert len(queue_files) == 1 + content = queue_files[0].read_text() + assert "_resolved_working_directory" not in content + + # Reload and verify it's None (not persisted) + reloaded = manager.storage.load_queue_state() + reloaded_prompt = reloaded.prompts[0] + assert reloaded_prompt._resolved_working_directory is None + + +# =========================================================================== +# Fallback When _resolved_working_directory Is None (CLN-023) +# =========================================================================== + + +def test_cleanup_falls_back_to_resolve_when_stash_missing(tmp_path, manager): # CLN-023 + """If _resolved_working_directory is None, cleanup resolves working_directory directly.""" + claude_dir, jsonl_dir, *_ = _make_claude_dirs(tmp_path, working_dir=FAKE_WORKING_DIR) + prompt = QueuedPrompt( + content="task", + working_directory=FAKE_WORKING_DIR, + status=PromptStatus.EXECUTING, + ) + prompt.last_executed = datetime.now() - timedelta(seconds=1) + prompt._resolved_working_directory = None # simulate missing stash + + jsonl_file = jsonl_dir / f"{SESSION_UUID}.jsonl" + _write_file(jsonl_file, size_bytes=4000) + + with patch("pathlib.Path.home", return_value=tmp_path): + manager.state = QueueState() + manager.state.add_prompt(prompt) + manager._cleanup_rate_limit_artifacts(prompt) + + assert not jsonl_file.exists(), "Cleanup should fall back to resolving working_directory" From 69cb1875e19e672524cb01b83556cacf17828fd5 Mon Sep 17 00:00:00 2001 From: Matthew Tibbits Date: Mon, 16 Mar 2026 02:26:19 +0000 Subject: [PATCH 2/6] feat: display token usage and duration after each job execution After each job completes during `claude-queue start`, print duration and token usage (input + output) extracted from Claude Code's JSONL conversation logs. Detailed cache breakdowns are persisted in the prompt's execution log. Uses the existing JSONL files under ~/.claude/projects/ rather than switching to --output-format json, preserving text-mode stdout, Fix 2 rate-limit detection, and all existing tests unchanged. Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Matthew Tibbits --- src/claude_code_queue/models.py | 16 + src/claude_code_queue/queue_manager.py | 115 ++++- tests/test_session_stats.py | 563 +++++++++++++++++++++++++ 3 files changed, 693 insertions(+), 1 deletion(-) create mode 100644 tests/test_session_stats.py diff --git a/src/claude_code_queue/models.py b/src/claude_code_queue/models.py index 2282715..a832229 100644 --- a/src/claude_code_queue/models.py +++ b/src/claude_code_queue/models.py @@ -245,6 +245,22 @@ def get_stats(self) -> Dict[str, Any]: } +@dataclass +class SessionStats: + """Token usage statistics extracted from a session's JSONL log.""" + + input_tokens: int = 0 + output_tokens: int = 0 + cache_creation_input_tokens: int = 0 + cache_read_input_tokens: int = 0 + api_turns: int = 0 + + @property + def total_input_tokens(self) -> int: + """Total tokens billed as input (non-cached + cache-write + cache-read).""" + return self.input_tokens + self.cache_creation_input_tokens + self.cache_read_input_tokens + + @dataclass class ExecutionResult: """Result of executing a prompt.""" diff --git a/src/claude_code_queue/queue_manager.py b/src/claude_code_queue/queue_manager.py index 1a91e55..3f44185 100644 --- a/src/claude_code_queue/queue_manager.py +++ b/src/claude_code_queue/queue_manager.py @@ -2,6 +2,7 @@ Queue manager with execution loop. """ +import json import os import sys import time @@ -10,7 +11,7 @@ from pathlib import Path from typing import List, Optional, Callable, Dict, Any -from .models import QueuedPrompt, QueueState, PromptStatus, ExecutionResult +from .models import QueuedPrompt, QueueState, PromptStatus, ExecutionResult, SessionStats from .storage import QueueStorage from .claude_interface import ClaudeCodeInterface @@ -266,15 +267,22 @@ def _process_execution_result( """Process the result of prompt execution.""" execution_summary = f"Execution completed in {result.execution_time:.1f}s" + # Extract token usage from the JSONL conversation log BEFORE any branch + # logic runs. CRITICAL: this must happen before _cleanup_rate_limit_artifacts() + # which deletes the JSONL file on the rate-limited path. + stats = self._extract_session_stats(prompt) + if result.success: # retry_not_before is already None — cleared by _execute_prompt() via clear_retry_backoff(). prompt.status = PromptStatus.COMPLETED prompt.add_log(f"{execution_summary} - SUCCESS") if result.output: prompt.add_log(f"Output:\n{result.output}") + self._log_session_stats(prompt, stats) self.state.total_processed += 1 print(f"✓ Prompt {prompt.id} completed successfully") + print(self._format_stats_line(result.execution_time, stats)) elif result.is_non_retryable: # Fix B — Non-retryable error: fail immediately, skip retry counter and can_retry(). @@ -317,10 +325,12 @@ def _process_execution_result( else "" ) prompt.add_log(f"Message{source_tag}: {result.rate_limit_info.limit_message}") + self._log_session_stats(prompt, stats) if not was_already_rate_limited and self.state is not None: self.state.rate_limited_count += 1 print(f"⚠ Prompt {prompt.id} rate limited, will retry later") + print(self._format_stats_line(result.execution_time, stats)) self._cleanup_rate_limit_artifacts(prompt) @@ -340,23 +350,27 @@ def _process_execution_result( ) if result.error: prompt.add_log(f"Error: {result.error}") + self._log_session_stats(prompt, stats) print( f"✗ Prompt {prompt.id} failed, will retry in " f"{self._generic_failure_retry_delay}s " f"({prompt.retry_count}/{'∞' if prompt.max_retries == -1 else prompt.max_retries})" ) + print(self._format_stats_line(result.execution_time, stats)) else: prompt.status = PromptStatus.FAILED prompt.clear_retry_backoff() # Fix 3: clear stale field for YAML cleanliness prompt.add_log(f"{execution_summary} - FAILED (max retries exceeded)") if result.error: prompt.add_log(f"Error: {result.error}") + self._log_session_stats(prompt, stats) self.state.failed_count += 1 retries_str = "∞" if prompt.max_retries == -1 else str(prompt.max_retries) print( f"✗ Prompt {prompt.id} failed permanently after {retries_str} attempts" ) + print(self._format_stats_line(result.execution_time, stats)) self.state.last_processed = datetime.now() @@ -487,6 +501,105 @@ def _format_duration(self, seconds: float) -> str: return f"{hours}h" return f"{hours}h {minutes}m" + def _extract_session_stats(self, prompt: QueuedPrompt) -> Optional[SessionStats]: + """Extract token usage from the JSONL conversation log for a just-finished execution. + + Locates the JSONL file using the same path-encoding logic as + _do_cleanup_rate_limit_artifacts(), then sums usage across all assistant + turns. + + Returns None if the JSONL cannot be found or parsed. + Best-effort: failures are logged but never propagate. + + IMPORTANT: This method relies on Claude Code's internal file layout under + ~/.claude/projects/. See _do_cleanup_rate_limit_artifacts() for the same + caveat about undocumented internal structure. + """ + if not prompt.last_executed: + return None + + try: + return self._do_extract_session_stats(prompt) + except Exception as e: + prompt.add_log(f"Warning: session stats extraction failed: {e}") + return None + + def _do_extract_session_stats(self, prompt: QueuedPrompt) -> Optional[SessionStats]: + """Inner implementation — may raise; caller catches all exceptions.""" + cutoff = prompt.last_executed.timestamp() + claude_dir = Path.home() / ".claude" + + resolved = prompt._resolved_working_directory or str( + Path(prompt.working_directory).resolve() + ) + encoded = resolved.replace("/", "-") + jsonl_dir = claude_dir / "projects" / encoded + + if not jsonl_dir.is_dir(): + return None + + # Find the newest .jsonl file with mtime >= cutoff (no size cap). + best_file = None + best_mtime = 0.0 + for f in jsonl_dir.glob("*.jsonl"): + try: + st = f.stat() + if st.st_mtime >= cutoff and st.st_mtime > best_mtime: + best_mtime = st.st_mtime + best_file = f + except OSError: + pass + + if best_file is None: + return None + + # Sum usage across all assistant turns, line-by-line. + stats = SessionStats() + with open(best_file, "r") as fh: + for line in fh: + try: + obj = json.loads(line) + except ValueError: + continue + if obj.get("type") != "assistant" or "message" not in obj: + continue + usage = obj["message"].get("usage", {}) + stats.input_tokens += usage.get("input_tokens", 0) + stats.output_tokens += usage.get("output_tokens", 0) + stats.cache_creation_input_tokens += usage.get("cache_creation_input_tokens", 0) + stats.cache_read_input_tokens += usage.get("cache_read_input_tokens", 0) + stats.api_turns += 1 + + if stats.api_turns == 0: + return None + + return stats + + def _format_stats_line( + self, execution_time: float, stats: Optional[SessionStats] + ) -> str: + """Format a stats line for console output after job completion.""" + parts = [f"Duration: {self._format_duration(execution_time)}"] + if stats is not None: + parts.append(f"Input: {stats.total_input_tokens:,} tokens") + parts.append(f"Output: {stats.output_tokens:,} tokens") + return " " + " | ".join(parts) + + def _log_session_stats( + self, prompt: QueuedPrompt, stats: Optional[SessionStats] + ) -> None: + """Log detailed token usage to the prompt's execution log (.md file).""" + if stats is None: + return + prompt.add_log( + f"Token usage: {stats.input_tokens:,} input" + f" + {stats.cache_creation_input_tokens:,} cache-write" + f" + {stats.cache_read_input_tokens:,} cache-read" + f" = {stats.total_input_tokens:,} total input," + f" {stats.output_tokens:,} output" + f" ({stats.api_turns} API turn{'s' if stats.api_turns != 1 else ''})" + ) + def add_prompt(self, prompt: QueuedPrompt) -> bool: """Add a prompt to the queue.""" try: diff --git a/tests/test_session_stats.py b/tests/test_session_stats.py new file mode 100644 index 0000000..54a78a9 --- /dev/null +++ b/tests/test_session_stats.py @@ -0,0 +1,563 @@ +""" +Tests for SessionStats dataclass and session stats extraction from JSONL logs. + +Test IDs use the SS- prefix for cross-reference. +""" + +import json +import os +from datetime import datetime, timedelta +from pathlib import Path +from unittest.mock import patch + +from claude_code_queue.models import ( + SessionStats, + QueuedPrompt, + PromptStatus, + ExecutionResult, + RateLimitInfo, +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_assistant_line( + input_tokens=10, + output_tokens=20, + cache_creation=100, + cache_read=200, +): + """Build a single JSONL assistant line with the given usage values.""" + return json.dumps({ + "type": "assistant", + "message": { + "role": "assistant", + "content": [{"type": "text", "text": "hello"}], + "usage": { + "input_tokens": input_tokens, + "output_tokens": output_tokens, + "cache_creation_input_tokens": cache_creation, + "cache_read_input_tokens": cache_read, + }, + }, + }) + + +def _make_user_line(): + """Build a JSONL user line (should be ignored by stats extraction).""" + return json.dumps({ + "type": "user", + "message": {"role": "user", "content": "say hello"}, + }) + + +def _make_queue_op_line(): + """Build a JSONL queue-operation line (should be ignored).""" + return json.dumps({ + "type": "queue-operation", + "operation": "enqueue", + "timestamp": "2026-03-15T12:00:00.000Z", + }) + + +def _make_last_prompt_line(): + """Build a JSONL last-prompt line (should be ignored).""" + return json.dumps({ + "type": "last-prompt", + "lastPrompt": "say hello", + }) + + +def _write_jsonl(path, lines): + """Write JSONL lines to a file and return the path.""" + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, "w") as f: + for line in lines: + f.write(line + "\n") + return path + + +def _setup_jsonl_for_prompt(tmp_path, prompt, lines): + """Create the JSONL file in the expected directory structure for a prompt. + + Returns the path to the JSONL file. + """ + resolved = prompt._resolved_working_directory or str( + Path(prompt.working_directory).resolve() + ) + encoded = resolved.replace("/", "-") + jsonl_dir = tmp_path / ".claude" / "projects" / encoded + jsonl_file = jsonl_dir / "session-uuid.jsonl" + _write_jsonl(jsonl_file, lines) + return jsonl_file + + +def _make_stats_prompt(tmp_path): + """Create a QueuedPrompt wired to a working directory under tmp_path.""" + work_dir = tmp_path / "workdir" + work_dir.mkdir(exist_ok=True) + prompt = QueuedPrompt( + id="abc12345", + content="test", + working_directory=str(work_dir), + ) + prompt.last_executed = datetime.now() - timedelta(seconds=5) + prompt._resolved_working_directory = str(work_dir) + return prompt + + +# =========================================================================== +# SessionStats — basic properties +# =========================================================================== + + +def test_session_stats_defaults_are_zero(): # SS-001 + stats = SessionStats() + assert stats.input_tokens == 0 + assert stats.output_tokens == 0 + assert stats.cache_creation_input_tokens == 0 + assert stats.cache_read_input_tokens == 0 + assert stats.api_turns == 0 + + +def test_session_stats_total_input_sums_all_three(): # SS-002 + stats = SessionStats( + input_tokens=10, + cache_creation_input_tokens=100, + cache_read_input_tokens=200, + ) + assert stats.total_input_tokens == 310 + + +def test_session_stats_total_input_zero_when_all_zero(): # SS-003 + stats = SessionStats() + assert stats.total_input_tokens == 0 + + +# =========================================================================== +# _extract_session_stats() +# =========================================================================== + + +def test_extract_stats_single_turn(manager, tmp_path, mocker): # SS-010 + mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path) + prompt = _make_stats_prompt(tmp_path) + jsonl_file = _setup_jsonl_for_prompt(tmp_path, prompt, [ + _make_user_line(), + _make_assistant_line(input_tokens=5, output_tokens=50, cache_creation=1000, cache_read=2000), + ]) + os.utime(jsonl_file, (datetime.now().timestamp(), datetime.now().timestamp())) + + stats = manager._extract_session_stats(prompt) + + assert stats is not None + assert stats.input_tokens == 5 + assert stats.output_tokens == 50 + assert stats.cache_creation_input_tokens == 1000 + assert stats.cache_read_input_tokens == 2000 + assert stats.total_input_tokens == 3005 + assert stats.api_turns == 1 + + +def test_extract_stats_multi_turn(manager, tmp_path, mocker): # SS-011 + mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path) + prompt = _make_stats_prompt(tmp_path) + jsonl_file = _setup_jsonl_for_prompt(tmp_path, prompt, [ + _make_user_line(), + _make_assistant_line(input_tokens=3, output_tokens=100, cache_creation=5000, cache_read=8000), + _make_user_line(), + _make_assistant_line(input_tokens=1, output_tokens=200, cache_creation=5000, cache_read=8000), + _make_user_line(), + _make_assistant_line(input_tokens=1, output_tokens=150, cache_creation=0, cache_read=10000), + ]) + os.utime(jsonl_file, (datetime.now().timestamp(), datetime.now().timestamp())) + + stats = manager._extract_session_stats(prompt) + + assert stats is not None + assert stats.input_tokens == 5 + assert stats.output_tokens == 450 + assert stats.cache_creation_input_tokens == 10000 + assert stats.cache_read_input_tokens == 26000 + assert stats.total_input_tokens == 36005 + assert stats.api_turns == 3 + + +def test_extract_stats_non_assistant_lines_ignored(manager, tmp_path, mocker): # SS-012 + mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path) + prompt = _make_stats_prompt(tmp_path) + jsonl_file = _setup_jsonl_for_prompt(tmp_path, prompt, [ + _make_queue_op_line(), + _make_user_line(), + _make_assistant_line(input_tokens=3, output_tokens=10, cache_creation=100, cache_read=200), + _make_last_prompt_line(), + ]) + os.utime(jsonl_file, (datetime.now().timestamp(), datetime.now().timestamp())) + + stats = manager._extract_session_stats(prompt) + + assert stats is not None + assert stats.input_tokens == 3 + assert stats.output_tokens == 10 + assert stats.api_turns == 1 + + +def test_extract_stats_missing_usage_block(manager, tmp_path, mocker): # SS-013 + """Assistant line without message.usage should contribute 0.""" + mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path) + prompt = _make_stats_prompt(tmp_path) + line_no_usage = json.dumps({ + "type": "assistant", + "message": { + "role": "assistant", + "content": [{"type": "text", "text": "hi"}], + }, + }) + jsonl_file = _setup_jsonl_for_prompt(tmp_path, prompt, [ + line_no_usage, + _make_assistant_line(input_tokens=5, output_tokens=10, cache_creation=100, cache_read=200), + ]) + os.utime(jsonl_file, (datetime.now().timestamp(), datetime.now().timestamp())) + + stats = manager._extract_session_stats(prompt) + + assert stats is not None + assert stats.api_turns == 2 + assert stats.input_tokens == 5 + assert stats.output_tokens == 10 + + +def test_extract_stats_malformed_line_skipped(manager, tmp_path, mocker): # SS-014 + """Non-JSON lines should be skipped; valid lines still counted.""" + mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path) + prompt = _make_stats_prompt(tmp_path) + jsonl_file = _setup_jsonl_for_prompt(tmp_path, prompt, [ + "this is not json", + _make_assistant_line(input_tokens=7, output_tokens=30, cache_creation=500, cache_read=600), + "{bad json", + ]) + os.utime(jsonl_file, (datetime.now().timestamp(), datetime.now().timestamp())) + + stats = manager._extract_session_stats(prompt) + + assert stats is not None + assert stats.input_tokens == 7 + assert stats.output_tokens == 30 + assert stats.api_turns == 1 + + +def test_extract_stats_old_mtime_returns_none(manager, tmp_path, mocker): # SS-015 + """JSONL file exists but mtime is before cutoff — returns None.""" + mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path) + prompt = _make_stats_prompt(tmp_path) + jsonl_file = _setup_jsonl_for_prompt(tmp_path, prompt, [ + _make_assistant_line(), + ]) + old_time = (prompt.last_executed - timedelta(hours=1)).timestamp() + os.utime(jsonl_file, (old_time, old_time)) + + stats = manager._extract_session_stats(prompt) + assert stats is None + + +def test_extract_stats_empty_file(manager, tmp_path, mocker): # SS-016 + """Empty JSONL file — returns None (0 API turns).""" + mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path) + prompt = _make_stats_prompt(tmp_path) + jsonl_file = _setup_jsonl_for_prompt(tmp_path, prompt, []) + os.utime(jsonl_file, (datetime.now().timestamp(), datetime.now().timestamp())) + + stats = manager._extract_session_stats(prompt) + assert stats is None + + +def test_extract_stats_directory_missing(manager, tmp_path, mocker): # SS-017 + """~/.claude/projects// doesn't exist — returns None.""" + mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path) + prompt = _make_stats_prompt(tmp_path) + + stats = manager._extract_session_stats(prompt) + assert stats is None + + +def test_extract_stats_resolved_dir_none_fallback(manager, tmp_path, mocker): # SS-018 + """When _resolved_working_directory is None, falls back to resolving working_directory.""" + mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path) + prompt = _make_stats_prompt(tmp_path) + prompt._resolved_working_directory = None + resolved = str(Path(prompt.working_directory).resolve()) + encoded = resolved.replace("/", "-") + jsonl_dir = tmp_path / ".claude" / "projects" / encoded + jsonl_file = jsonl_dir / "session.jsonl" + _write_jsonl(jsonl_file, [ + _make_assistant_line(input_tokens=1, output_tokens=2, cache_creation=3, cache_read=4), + ]) + os.utime(jsonl_file, (datetime.now().timestamp(), datetime.now().timestamp())) + + stats = manager._extract_session_stats(prompt) + + assert stats is not None + assert stats.total_input_tokens == 8 + + +def test_extract_stats_last_executed_none(manager, tmp_path): # SS-019 + """When last_executed is None, returns None immediately.""" + prompt = _make_stats_prompt(tmp_path) + prompt.last_executed = None + + stats = manager._extract_session_stats(prompt) + assert stats is None + + +def test_extract_stats_newest_file_selected(manager, tmp_path, mocker): # SS-020 + """When multiple JSONL files match, the newest one is used.""" + mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path) + prompt = _make_stats_prompt(tmp_path) + resolved = prompt._resolved_working_directory + encoded = resolved.replace("/", "-") + jsonl_dir = tmp_path / ".claude" / "projects" / encoded + jsonl_dir.mkdir(parents=True, exist_ok=True) + + older = jsonl_dir / "old-session.jsonl" + _write_jsonl(older, [ + _make_assistant_line(input_tokens=999, output_tokens=999, cache_creation=0, cache_read=0), + ]) + old_time = datetime.now().timestamp() - 2 + os.utime(older, (old_time, old_time)) + + newer = jsonl_dir / "new-session.jsonl" + _write_jsonl(newer, [ + _make_assistant_line(input_tokens=1, output_tokens=2, cache_creation=3, cache_read=4), + ]) + new_time = datetime.now().timestamp() + os.utime(newer, (new_time, new_time)) + + stats = manager._extract_session_stats(prompt) + + assert stats is not None + assert stats.input_tokens == 1 + assert stats.output_tokens == 2 + + +def test_extract_stats_exception_returns_none(manager, tmp_path): # SS-021 + """Internal errors are caught and None is returned.""" + prompt = _make_stats_prompt(tmp_path) + with patch.object(manager, "_do_extract_session_stats", side_effect=OSError("boom")): + stats = manager._extract_session_stats(prompt) + assert stats is None + + +# =========================================================================== +# _format_stats_line() +# =========================================================================== + + +def test_format_stats_line_with_stats(manager): # SS-030 + stats = SessionStats( + input_tokens=100, + output_tokens=500, + cache_creation_input_tokens=10000, + cache_read_input_tokens=5000, + api_turns=3, + ) + line = manager._format_stats_line(154.0, stats) + assert "Duration: 2m" in line + assert "Input: 15,100 tokens" in line + assert "Output: 500 tokens" in line + assert line.startswith(" ") + + +def test_format_stats_line_without_stats(manager): # SS-031 + line = manager._format_stats_line(45.0, None) + assert "Duration: 45s" in line + assert "Input" not in line + assert "Output" not in line + assert line.startswith(" ") + + +def test_format_stats_line_pipe_separators(manager): # SS-032 + stats = SessionStats(input_tokens=1, output_tokens=2) + line = manager._format_stats_line(10.0, stats) + assert " | " in line + + +# =========================================================================== +# _log_session_stats() +# =========================================================================== + + +def test_log_session_stats_detailed_breakdown(manager): # SS-050 + prompt = QueuedPrompt(id="abc12345", content="test") + stats = SessionStats( + input_tokens=402, + output_tokens=51568, + cache_creation_input_tokens=19093602, + cache_read_input_tokens=4255901, + api_turns=297, + ) + + manager._log_session_stats(prompt, stats) + + assert "402 input" in prompt.execution_log + assert "19,093,602 cache-write" in prompt.execution_log + assert "4,255,901 cache-read" in prompt.execution_log + assert "23,349,905 total input" in prompt.execution_log + assert "51,568 output" in prompt.execution_log + assert "297 API turns" in prompt.execution_log + + +def test_log_session_stats_none_no_log(manager): # SS-051 + prompt = QueuedPrompt(id="abc12345", content="test") + manager._log_session_stats(prompt, None) + assert "Token usage" not in prompt.execution_log + + +def test_log_session_stats_single_turn_singular(manager): # SS-052 + prompt = QueuedPrompt(id="abc12345", content="test") + stats = SessionStats(input_tokens=1, output_tokens=2, api_turns=1) + manager._log_session_stats(prompt, stats) + assert "1 API turn)" in prompt.execution_log + + +# =========================================================================== +# Integration: stats printed in _process_execution_result() +# =========================================================================== + + +def test_result_success_prints_stats(manager, tmp_path, mocker, capsys): # SS-040 + mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path) + manager.state = manager.storage.load_queue_state() + prompt = _make_stats_prompt(tmp_path) + prompt.status = PromptStatus.EXECUTING + manager.state.add_prompt(prompt) + _setup_jsonl_for_prompt(tmp_path, prompt, [ + _make_assistant_line(input_tokens=5, output_tokens=50, cache_creation=1000, cache_read=2000), + ]) + result = ExecutionResult(success=True, output="done", execution_time=120.5) + + manager._process_execution_result(prompt, result) + + captured = capsys.readouterr().out + assert "completed successfully" in captured + assert "Duration:" in captured + assert "Input: 3,005 tokens" in captured + assert "Output: 50 tokens" in captured + + +def test_result_success_no_jsonl_prints_duration_only(manager, tmp_path, mocker, capsys): # SS-041 + mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path) + manager.state = manager.storage.load_queue_state() + prompt = _make_stats_prompt(tmp_path) + prompt.status = PromptStatus.EXECUTING + manager.state.add_prompt(prompt) + result = ExecutionResult(success=True, output="done", execution_time=30.0) + + manager._process_execution_result(prompt, result) + + captured = capsys.readouterr().out + assert "Duration: 30s" in captured + assert "Input" not in captured + + +def test_result_rate_limited_prints_stats_before_cleanup(manager, tmp_path, mocker, capsys): # SS-042 + """Stats must be extracted BEFORE cleanup deletes the JSONL.""" + mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path) + manager.state = manager.storage.load_queue_state() + prompt = _make_stats_prompt(tmp_path) + prompt.status = PromptStatus.EXECUTING + manager.state.add_prompt(prompt) + _setup_jsonl_for_prompt(tmp_path, prompt, [ + _make_assistant_line(input_tokens=3, output_tokens=10, cache_creation=500, cache_read=600), + ]) + + rate_info = RateLimitInfo( + is_rate_limited=True, + limit_message="usage limit reached", + ) + result = ExecutionResult( + success=False, + output="", + error="rate limited", + rate_limit_info=rate_info, + execution_time=5.0, + ) + + manager._process_execution_result(prompt, result) + + captured = capsys.readouterr().out + assert "rate limited" in captured + assert "Input: 1,103 tokens" in captured + assert "Output: 10 tokens" in captured + + +def test_result_generic_failure_retry_prints_stats(manager, tmp_path, mocker, capsys): # SS-043 + mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path) + manager.state = manager.storage.load_queue_state() + prompt = _make_stats_prompt(tmp_path) + prompt.status = PromptStatus.EXECUTING + manager.state.add_prompt(prompt) + _setup_jsonl_for_prompt(tmp_path, prompt, [ + _make_assistant_line(input_tokens=2, output_tokens=30, cache_creation=100, cache_read=200), + ]) + result = ExecutionResult( + success=False, output="", error="something broke", execution_time=10.0 + ) + + manager._process_execution_result(prompt, result) + + captured = capsys.readouterr().out + assert "failed" in captured + assert "Input: 302 tokens" in captured + assert "Output: 30 tokens" in captured + + +def test_result_generic_failure_permanent_prints_stats(manager, tmp_path, mocker, capsys): # SS-044 + mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path) + manager.state = manager.storage.load_queue_state() + prompt = _make_stats_prompt(tmp_path) + prompt.status = PromptStatus.EXECUTING + prompt.max_retries = 1 + prompt.retry_count = 1 + manager.state.add_prompt(prompt) + _setup_jsonl_for_prompt(tmp_path, prompt, [ + _make_assistant_line(input_tokens=1, output_tokens=5, cache_creation=50, cache_read=100), + ]) + result = ExecutionResult( + success=False, output="", error="something broke", execution_time=8.0 + ) + + manager._process_execution_result(prompt, result) + + captured = capsys.readouterr().out + assert "failed permanently" in captured + assert "Input: 151 tokens" in captured + assert "Output: 5 tokens" in captured + + +def test_result_non_retryable_no_stats_printed(manager, tmp_path, mocker, capsys): # SS-045 + """Non-retryable errors should not print stats.""" + mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path) + manager.state = manager.storage.load_queue_state() + prompt = _make_stats_prompt(tmp_path) + prompt.status = PromptStatus.EXECUTING + manager.state.add_prompt(prompt) + _setup_jsonl_for_prompt(tmp_path, prompt, [ + _make_assistant_line(input_tokens=1, output_tokens=1, cache_creation=1, cache_read=1), + ]) + result = ExecutionResult( + success=False, + output="", + error="nested session", + execution_time=1.0, + is_non_retryable=True, + ) + + manager._process_execution_result(prompt, result) + + captured = capsys.readouterr().out + assert "non-retryable" in captured + assert "Input" not in captured + assert "Duration" not in captured From 9877331747f8e76248b59449acf1b8cefd9669ec Mon Sep 17 00:00:00 2001 From: Matthew Tibbits Date: Mon, 16 Mar 2026 02:44:14 +0000 Subject: [PATCH 3/6] feat: add `cleanup` CLI subcommand for backlog artifact purge Adds `claude-queue cleanup [--dry-run]` to remove rate-limit artifacts from ~/.claude/. Identifies rate-limited sessions by scanning debug transcripts for 'rate_limit_error', then deletes correlated JSONL, todo, and telemetry files by UUID. No claude binary needed (E3 pattern). Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Matthew Tibbits --- CLAUDE.md | 1 + src/claude_code_queue/cli.py | 100 +++++++++++++++++++++++++++++++ tests/test_cli.py | 112 +++++++++++++++++++++++++++++++++++ 3 files changed, 213 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index c154483..76dba7c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -181,5 +181,6 @@ retry_not_before: null | `test` | Verify claude CLI | Yes | | `bank save/list/use/delete` | Template bank ops | No | | `batch generate/validate/variables` | Batch job generation | No | +| `cleanup [--dry-run]` | Remove rate-limit artifacts from ~/.claude/ | No | | `install-skill [--force]` | Copy SKILL.md to ~/.claude/skills/ | No | | `prompt-box` | Launch Rust TUI | No (needs Rust binary) | diff --git a/src/claude_code_queue/cli.py b/src/claude_code_queue/cli.py index 489fe42..3420b91 100644 --- a/src/claude_code_queue/cli.py +++ b/src/claude_code_queue/cli.py @@ -12,6 +12,7 @@ import sys from datetime import datetime from pathlib import Path +from typing import List from .batch import ( extract_variables, @@ -241,6 +242,15 @@ def main(): "--force", action="store_true", help="Overwrite existing skill file" ) + # Cleanup subcommand + cleanup_parser = subparsers.add_parser( + "cleanup", help="Remove rate-limit artifacts from ~/.claude/" + ) + cleanup_parser.add_argument( + "--dry-run", action="store_true", + help="Report what would be deleted without acting", + ) + # Prompt box subcommand prompt_box_parser = subparsers.add_parser( "prompt-box", help="Launch the interactive prompt box CLI", add_help=False @@ -279,6 +289,8 @@ def main(): return cmd_batch(args) elif args.command == "install-skill": return cmd_install_skill(args) + elif args.command == "cleanup": + return cmd_cleanup(args) elif args.command == "prompt-box": return cmd_prompt_box(args) else: @@ -721,6 +733,94 @@ def cmd_install_skill(args) -> int: return 0 +def cmd_cleanup(args) -> int: + """Remove rate-limit artifacts from ~/.claude/. + + Primary identification: scan debug transcripts for 'rate_limit_error' in + the content (authoritative signal). Then delete correlated JSONL, todo, + and telemetry files by UUID. + + This is the E3 pattern: no claude binary needed. + """ + claude_dir = Path.home() / ".claude" + dry_run = args.dry_run + matched = 0 + skipped = 0 + rate_limited_uuids: List[str] = [] + + # 1. Debug transcripts — primary identification via content grep. + # Read the full file (max ~90 KB for successful runs) since this is a + # one-time tool where correctness matters more than speed. + debug_dir = claude_dir / "debug" + if debug_dir.is_dir(): + for debug_file in debug_dir.glob("*.txt"): + try: + with open(debug_file, "r", errors="replace") as fh: + content = fh.read() + if "rate_limit_error" in content: + rate_limited_uuids.append(debug_file.stem) + if dry_run: + print(f" [dry-run] would delete {debug_file}") + else: + debug_file.unlink() + matched += 1 + except OSError: + skipped += 1 + + if rate_limited_uuids: + print(f"Identified {len(rate_limited_uuids)} rate-limited session(s)") + + # 2. JSONL conversation logs — by UUID correlation + projects_dir = claude_dir / "projects" + if projects_dir.is_dir(): + for session_uuid in rate_limited_uuids: + for jsonl_file in projects_dir.glob(f"*/{session_uuid}.jsonl"): + try: + if dry_run: + print(f" [dry-run] would delete {jsonl_file}") + else: + jsonl_file.unlink() + matched += 1 + except OSError: + skipped += 1 + + # 3. Todo stubs — by UUID correlation + 2-byte size guard + todos_dir = claude_dir / "todos" + if todos_dir.is_dir(): + for session_uuid in rate_limited_uuids: + todo_file = todos_dir / f"{session_uuid}-agent-{session_uuid}.json" + try: + st = todo_file.stat() + if st.st_size <= 2: + if dry_run: + print(f" [dry-run] would delete {todo_file}") + else: + todo_file.unlink() + matched += 1 + except OSError: + skipped += 1 + + # 4. Telemetry — by UUID correlation + telemetry_dir = claude_dir / "telemetry" + if telemetry_dir.is_dir(): + for session_uuid in rate_limited_uuids: + for f in telemetry_dir.glob(f"1p_failed_events.{session_uuid}.*.json"): + try: + if dry_run: + print(f" [dry-run] would delete {f}") + else: + f.unlink() + matched += 1 + except OSError: + skipped += 1 + + action = "Would delete" if dry_run else "Deleted" + print(f"{action} {matched} rate-limit artifact(s)") + if skipped: + print(f"Skipped {skipped} file(s) due to errors") + return 0 + + def cmd_prompt_box(args) -> int: """Launch the interactive prompt box CLI.""" try: diff --git a/tests/test_cli.py b/tests/test_cli.py index 5a6cd06..8a5c941 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1202,3 +1202,115 @@ def test_batch_variables_template_not_found(self, tmp_path, capsys): def test_batch_variables_returns_0(self, tmp_path): code = self._run(tmp_path, "---\npriority: 0\n---\n\nProcess {{item}}") assert code == 0 + + +# =========================================================================== +# Cleanup Command +# =========================================================================== + +class TestCleanup: + """Tests for `claude-queue cleanup [--dry-run]`.""" + + def _make_artifacts(self, tmp_path, session_uuid="aaa-bbb-ccc"): + """Create fake rate-limit artifacts under tmp_path/.claude/.""" + claude_dir = tmp_path / ".claude" + debug_dir = claude_dir / "debug" + projects_dir = claude_dir / "projects" / "-home-testuser-project" + todos_dir = claude_dir / "todos" + telemetry_dir = claude_dir / "telemetry" + for d in (debug_dir, projects_dir, todos_dir, telemetry_dir): + d.mkdir(parents=True) + + # Debug file with rate_limit_error content + debug_file = debug_dir / f"{session_uuid}.txt" + debug_file.write_text("startup\nrate_limit_error\n") + + # Correlated JSONL + jsonl_file = projects_dir / f"{session_uuid}.jsonl" + jsonl_file.write_bytes(b"x" * 5000) + + # Correlated todo stub + todo_file = todos_dir / f"{session_uuid}-agent-{session_uuid}.json" + todo_file.write_text("[]") + + # Correlated telemetry file + telemetry_file = telemetry_dir / f"1p_failed_events.{session_uuid}.other-uuid.json" + telemetry_file.write_text('{"events": []}') + + return debug_file, jsonl_file, todo_file, telemetry_file + + def test_cleanup_dry_run_does_not_delete(self, tmp_path, capsys): + debug_file, jsonl_file, todo_file, telemetry_file = self._make_artifacts(tmp_path) + + with patch("sys.argv", ["claude-queue", "cleanup", "--dry-run"]): + with patch("pathlib.Path.home", return_value=tmp_path): + code = main() + + assert code == 0 + assert debug_file.exists(), "dry-run must not delete files" + assert jsonl_file.exists() + assert todo_file.exists() + assert telemetry_file.exists() + out = capsys.readouterr().out + assert "Would delete" in out + assert "dry-run" in out + + def test_cleanup_deletes_artifacts(self, tmp_path, capsys): + debug_file, jsonl_file, todo_file, telemetry_file = self._make_artifacts(tmp_path) + + with patch("sys.argv", ["claude-queue", "cleanup"]): + with patch("pathlib.Path.home", return_value=tmp_path): + code = main() + + assert code == 0 + assert not debug_file.exists() + assert not jsonl_file.exists() + assert not todo_file.exists() + assert not telemetry_file.exists() + out = capsys.readouterr().out + assert "Deleted 4 rate-limit artifact(s)" in out + + def test_cleanup_preserves_non_rate_limited_debug(self, tmp_path, capsys): + """Debug files without rate_limit_error are not deleted.""" + claude_dir = tmp_path / ".claude" + debug_dir = claude_dir / "debug" + debug_dir.mkdir(parents=True) + + good_file = debug_dir / "good-session.txt" + good_file.write_text("startup\nall good\nstream completed\n") + + with patch("sys.argv", ["claude-queue", "cleanup"]): + with patch("pathlib.Path.home", return_value=tmp_path): + code = main() + + assert code == 0 + assert good_file.exists() + assert "Deleted 0" in capsys.readouterr().out + + def test_cleanup_preserves_real_todo_file(self, tmp_path, capsys): + """Todo files > 2 bytes are preserved even if UUID matches a rate-limited session.""" + debug_file, jsonl_file, todo_file, telemetry_file = self._make_artifacts(tmp_path) + # Overwrite the stub with realistic todo content (> 2 bytes) + todo_file.write_text('[{"task": "implement feature", "status": "in_progress"}]') + + with patch("sys.argv", ["claude-queue", "cleanup"]): + with patch("pathlib.Path.home", return_value=tmp_path): + code = main() + + assert code == 0 + assert todo_file.exists(), "real todo file (> 2 bytes) must be preserved" + assert not debug_file.exists() + assert not jsonl_file.exists() + # 3 deleted: debug + jsonl + telemetry (todo preserved by size guard) + assert "Deleted 3" in capsys.readouterr().out + + def test_cleanup_handles_empty_claude_dir(self, tmp_path, capsys): + """Cleanup succeeds when ~/.claude/ has no artifact directories.""" + (tmp_path / ".claude").mkdir() + + with patch("sys.argv", ["claude-queue", "cleanup"]): + with patch("pathlib.Path.home", return_value=tmp_path): + code = main() + + assert code == 0 + assert "Deleted 0" in capsys.readouterr().out From 53e0fa4599c45293fb5718bf5b1ad1d6c99ed4ed Mon Sep 17 00:00:00 2001 From: Matthew Tibbits Date: Mon, 16 Mar 2026 03:14:58 +0000 Subject: [PATCH 4/6] feat: add per-prompt `model` field for model selection Allow users to specify a Claude model ID (e.g. claude-haiku-4-5-20251001) per queued prompt via YAML frontmatter or `claude-queue add --model`. When set, the value is passed to the claude CLI via `--model `. - models.py: add `model: Optional[str]` field to QueuedPrompt - storage.py: R7 type-safe coercion in parse, round-trip in write, include in templates and bank list output - claude_interface.py: inject `--model` flag before positional prompt - cli.py: add `--model/-m` to `add` subparser, display in `bank list` - CLAUDE.md: add model field to YAML schema, --model to CLI reference - 14 new tests across all 4 test files Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Matthew Tibbits --- CLAUDE.md | 3 +- src/claude_code_queue/claude_interface.py | 3 + src/claude_code_queue/cli.py | 6 ++ src/claude_code_queue/models.py | 1 + src/claude_code_queue/storage.py | 11 +++ tests/test_claude_interface.py | 36 ++++++++ tests/test_cli.py | 25 ++++++ tests/test_models.py | 17 ++++ tests/test_storage.py | 103 ++++++++++++++++++++++ 9 files changed, 204 insertions(+), 1 deletion(-) diff --git a/CLAUDE.md b/CLAUDE.md index c154483..b06fc1f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -157,6 +157,7 @@ working_directory: . # Execution CWD (resolved relative) context_files: [] # Files passed as @-references max_retries: 3 # Total attempts (1=no retry, -1=unlimited) estimated_tokens: null # Optional hint +model: null # Optional Claude model ID (e.g. claude-haiku-4-5-20251001) # Internal fields (managed by the queue, not user-edited): status: queued retry_count: 0 @@ -173,7 +174,7 @@ retry_not_before: null | Command | Purpose | Needs `claude` binary? | |---|---|---| | `start [--verbose] [--no-skip-permissions]` | Run queue loop | Yes | -| `add [-p priority]` | Quick-add prompt | No | +| `add [-p priority] [-m model]` | Quick-add prompt | No | | `template [-p priority]` | Create template .md | No | | `status [--json] [--detailed]` | Queue stats | No | | `list [--status ] [--json]` | List prompts | No | diff --git a/src/claude_code_queue/claude_interface.py b/src/claude_code_queue/claude_interface.py index 19d8b3f..ebaad5c 100644 --- a/src/claude_code_queue/claude_interface.py +++ b/src/claude_code_queue/claude_interface.py @@ -304,6 +304,9 @@ def execute_prompt(self, prompt: QueuedPrompt) -> ExecutionResult: if context_refs: full_prompt = f"{' '.join(context_refs)} {prompt.content}" + if prompt.model is not None: + cmd.extend(["--model", prompt.model]) + cmd.append(full_prompt) # E1 — Use cwd= instead of os.chdir() to set the subprocess working directory. diff --git a/src/claude_code_queue/cli.py b/src/claude_code_queue/cli.py index 489fe42..b83539d 100644 --- a/src/claude_code_queue/cli.py +++ b/src/claude_code_queue/cli.py @@ -139,6 +139,9 @@ def main(): add_parser.add_argument( "--estimated-tokens", "-t", type=int, help="Estimated token usage" ) + add_parser.add_argument( + "--model", "-m", default=None, help="Claude model ID (e.g. claude-haiku-4-5-20251001)" + ) template_parser = subparsers.add_parser( "template", help="Create a prompt template file" @@ -322,6 +325,7 @@ def cmd_add(args) -> int: context_files=args.context_files, max_retries=args.max_retries, estimated_tokens=args.estimated_tokens, + model=args.model, ) # Use _save_single_prompt directly rather than load_queue_state() + # save_queue_state(). Loading the full queue state just to append one file @@ -543,6 +547,8 @@ def cmd_bank_list(args) -> int: print(f" Working directory: {template['working_directory']}") if template['estimated_tokens']: print(f" Estimated tokens: {template['estimated_tokens']}") + if template.get('model'): + print(f" Model: {template['model']}") print(f" Modified: {template['modified'].strftime('%Y-%m-%d %H:%M:%S')}") print() diff --git a/src/claude_code_queue/models.py b/src/claude_code_queue/models.py index 2282715..e4817e1 100644 --- a/src/claude_code_queue/models.py +++ b/src/claude_code_queue/models.py @@ -35,6 +35,7 @@ class QueuedPrompt: status: PromptStatus = PromptStatus.QUEUED execution_log: str = "" estimated_tokens: Optional[int] = None + model: Optional[str] = None last_executed: Optional[datetime] = None rate_limited_at: Optional[datetime] = None reset_time: Optional[datetime] = None diff --git a/src/claude_code_queue/storage.py b/src/claude_code_queue/storage.py index 44ce89f..50839f7 100644 --- a/src/claude_code_queue/storage.py +++ b/src/claude_code_queue/storage.py @@ -107,6 +107,11 @@ def parse_prompt_file(file_path: Path) -> Optional[QueuedPrompt]: except (ValueError, TypeError): retry_count = 0 + # R7 — Type-safe coercion for model. YAML parses `model: true` as bool and + # `model: 42` as int; subprocess.Popen requires all cmd elements to be str. + _raw_model = metadata.get("model") + _model = str(_raw_model) if _raw_model is not None else None + prompt = QueuedPrompt( id=prompt_id, content=prompt_content, @@ -117,6 +122,7 @@ def parse_prompt_file(file_path: Path) -> Optional[QueuedPrompt]: max_retries=metadata.get("max_retries", 3), retry_count=retry_count, estimated_tokens=metadata.get("estimated_tokens"), + model=_model, # R5 — Restore created_at from YAML; fall back to filesystem ctime. # Using ctime alone causes created_at to drift when files are copied or # their timestamps change. The YAML value is the authoritative source. @@ -161,6 +167,8 @@ def write_prompt_file(prompt: QueuedPrompt, file_path: Path) -> bool: metadata["context_files"] = prompt.context_files if prompt.estimated_tokens: metadata["estimated_tokens"] = prompt.estimated_tokens + if prompt.model is not None: + metadata["model"] = prompt.model if prompt.last_executed: metadata["last_executed"] = prompt.last_executed.isoformat() if prompt.rate_limited_at: @@ -453,6 +461,7 @@ def create_prompt_template(self, filename: str, priority: int = 0) -> Path: context_files: [] max_retries: 3 estimated_tokens: null +model: null --- # Prompt Title @@ -504,6 +513,7 @@ def save_prompt_to_bank(self, template_name: str, priority: int = 0) -> Path: context_files: [] max_retries: 3 estimated_tokens: null +model: null --- # {safe_name.replace('-', ' ').replace('_', ' ').title()} @@ -568,6 +578,7 @@ def list_bank_templates(self) -> List[dict]: 'priority': metadata.get('priority', 0), 'working_directory': metadata.get('working_directory', '.'), 'estimated_tokens': metadata.get('estimated_tokens'), + 'model': metadata.get('model'), 'modified': datetime.fromtimestamp(file_path.stat().st_mtime) }) diff --git a/tests/test_claude_interface.py b/tests/test_claude_interface.py index 181ba24..6832d2e 100644 --- a/tests/test_claude_interface.py +++ b/tests/test_claude_interface.py @@ -307,6 +307,42 @@ def test_execute_prompt_includes_dangerously_skip_permissions(interface): # CLI assert "--dangerously-skip-permissions" in args +def test_execute_prompt_includes_model_flag_when_set(interface): # CLI-060 + """When prompt.model is set, --model appears in the subprocess command.""" + mock_proc = make_mock_proc() + with patch("subprocess.Popen", return_value=mock_proc) as mock_popen: + prompt = QueuedPrompt(content="task", model="claude-haiku-4-5-20251001") + interface.execute_prompt(prompt) + args = mock_popen.call_args[0][0] + assert "--model" in args + model_idx = args.index("--model") + assert args[model_idx + 1] == "claude-haiku-4-5-20251001" + + +def test_execute_prompt_omits_model_flag_when_none(interface): # CLI-061 + """When prompt.model is None, no --model flag is added.""" + mock_proc = make_mock_proc() + with patch("subprocess.Popen", return_value=mock_proc) as mock_popen: + prompt = QueuedPrompt(content="task", model=None) + interface.execute_prompt(prompt) + args = mock_popen.call_args[0][0] + assert "--model" not in args + + +def test_execute_prompt_model_flag_before_prompt_arg(interface): # CLI-062 + """--model flag is placed before the positional prompt argument.""" + mock_proc = make_mock_proc() + with patch("subprocess.Popen", return_value=mock_proc) as mock_popen: + prompt = QueuedPrompt(content="my task", model="claude-opus-4-6") + interface.execute_prompt(prompt) + args = mock_popen.call_args[0][0] + model_idx = args.index("--model") + prompt_idx = args.index("my task") + assert model_idx < prompt_idx, ( + f"--model at {model_idx} must precede prompt at {prompt_idx}" + ) + + def test_execute_prompt_success_returns_success_result(interface): # CLI-026 """returncode=0 with no rate-limit output → success=True.""" mock_proc = make_mock_proc(returncode=0, stdout="All done", stderr="") diff --git a/tests/test_cli.py b/tests/test_cli.py index 5a6cd06..02be5d8 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -55,6 +55,7 @@ def _make_template( priority=0, working_directory=".", estimated_tokens=None, + model=None, modified=None, ): """Build a bank-template dict like QueueStorage returns.""" @@ -66,6 +67,7 @@ def _make_template( "priority": priority, "working_directory": working_directory, "estimated_tokens": estimated_tokens, + "model": model, "modified": modified, } @@ -233,6 +235,21 @@ def test_add_default_estimated_tokens_none(self): prompt = storage._save_single_prompt.call_args[0][0] assert prompt.estimated_tokens is None + def test_add_model_long_flag(self): + _, storage = self._run_add("--model", "claude-haiku-4-5-20251001") + prompt = storage._save_single_prompt.call_args[0][0] + assert prompt.model == "claude-haiku-4-5-20251001" + + def test_add_model_short_flag(self): + _, storage = self._run_add("-m", "claude-sonnet-4-6") + prompt = storage._save_single_prompt.call_args[0][0] + assert prompt.model == "claude-sonnet-4-6" + + def test_add_default_model_none(self): + _, storage = self._run_add() + prompt = storage._save_single_prompt.call_args[0][0] + assert prompt.model is None + def test_add_returns_zero_on_success(self): code, _ = self._run_add(success=True) assert code == 0 @@ -704,6 +721,14 @@ def test_bank_list_omits_estimated_tokens_when_none(self, capsys): self._run_bank_list(templates=[_make_template(estimated_tokens=None)]) assert "Estimated tokens" not in capsys.readouterr().out + def test_bank_list_shows_model_when_set(self, capsys): + self._run_bank_list(templates=[_make_template(model="claude-haiku-4-5-20251001")]) + assert "claude-haiku-4-5-20251001" in capsys.readouterr().out + + def test_bank_list_omits_model_when_none(self, capsys): + self._run_bank_list(templates=[_make_template(model=None)]) + assert "Model:" not in capsys.readouterr().out + def test_bank_list_shows_modified_timestamp(self, capsys): mod = datetime(2026, 3, 1, 10, 30, 0) self._run_bank_list(templates=[_make_template(modified=mod)]) diff --git a/tests/test_models.py b/tests/test_models.py index eb5974a..5c83f75 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -49,6 +49,23 @@ def test_add_log_is_cumulative(): # MOD-003 assert "third" in p.execution_log +# =========================================================================== +# QueuedPrompt — model field +# =========================================================================== + + +def test_prompt_model_default_is_none(): # MOD-029 + """QueuedPrompt defaults model to None.""" + p = QueuedPrompt(content="test") + assert p.model is None + + +def test_prompt_model_accepts_string(): # MOD-030 + """model field stores an arbitrary string model ID.""" + p = QueuedPrompt(content="test", model="claude-haiku-4-5-20251001") + assert p.model == "claude-haiku-4-5-20251001" + + # =========================================================================== # QueuedPrompt — should_execute_now() # =========================================================================== diff --git a/tests/test_storage.py b/tests/test_storage.py index cb80804..fedcbbc 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -386,6 +386,109 @@ def test_parse_estimated_tokens_null(tmp_path): # STO-023 assert prompt.estimated_tokens is None +def test_parse_reads_model(tmp_path): # STO-065 + """model: claude-haiku-4-5-20251001 in frontmatter → prompt.model == that string.""" + storage = QueueStorage(str(tmp_path)) + file_path = storage.queue_dir / "abc12345-task.md" + file_path.write_text( + "---\npriority: 0\nworking_directory: .\nmax_retries: 3\n" + "model: claude-haiku-4-5-20251001\n" + "status: queued\nretry_count: 0\ncreated_at: 2025-01-01T00:00:00\n---\n\ncontent" + ) + prompt = storage.parser.parse_prompt_file(file_path) + assert prompt is not None + assert prompt.model == "claude-haiku-4-5-20251001" + + +def test_parse_model_null(tmp_path): # STO-066 + """model: null in frontmatter → prompt.model is None.""" + storage = QueueStorage(str(tmp_path)) + file_path = storage.queue_dir / "abc12345-task.md" + file_path.write_text( + "---\npriority: 0\nworking_directory: .\nmax_retries: 3\n" + "model: null\n" + "status: queued\nretry_count: 0\ncreated_at: 2025-01-01T00:00:00\n---\n\ncontent" + ) + prompt = storage.parser.parse_prompt_file(file_path) + assert prompt is not None + assert prompt.model is None + + +def test_parse_model_coerces_bool_to_string(tmp_path): # STO-067 + """model: true in YAML → str coercion → prompt.model == 'True' (R7).""" + storage = QueueStorage(str(tmp_path)) + file_path = storage.queue_dir / "abc12345-task.md" + file_path.write_text( + "---\npriority: 0\nworking_directory: .\nmax_retries: 3\n" + "model: true\n" + "status: queued\nretry_count: 0\ncreated_at: 2025-01-01T00:00:00\n---\n\ncontent" + ) + prompt = storage.parser.parse_prompt_file(file_path) + assert prompt is not None + assert prompt.model == "True" + + +def test_parse_model_coerces_int_to_string(tmp_path): # STO-068 + """model: 42 in YAML → str coercion → prompt.model == '42' (R7).""" + storage = QueueStorage(str(tmp_path)) + file_path = storage.queue_dir / "abc12345-task.md" + file_path.write_text( + "---\npriority: 0\nworking_directory: .\nmax_retries: 3\n" + "model: 42\n" + "status: queued\nretry_count: 0\ncreated_at: 2025-01-01T00:00:00\n---\n\ncontent" + ) + prompt = storage.parser.parse_prompt_file(file_path) + assert prompt is not None + assert prompt.model == "42" + + +def test_model_roundtrip_write_then_parse(tmp_path): # STO-069 + """model survives write_prompt_file → parse_prompt_file round-trip.""" + storage = QueueStorage(str(tmp_path)) + prompt = QueuedPrompt(id="abc12345", content="task", model="claude-opus-4-6") + file_path = storage.queue_dir / "abc12345-task.md" + storage.parser.write_prompt_file(prompt, file_path) + parsed = storage.parser.parse_prompt_file(file_path) + assert parsed is not None + assert parsed.model == "claude-opus-4-6" + + +def test_model_none_roundtrip_write_then_parse(tmp_path): # STO-070 + """model=None survives write → parse round-trip (field omitted from YAML).""" + storage = QueueStorage(str(tmp_path)) + prompt = QueuedPrompt(id="abc12345", content="task", model=None) + file_path = storage.queue_dir / "abc12345-task.md" + storage.parser.write_prompt_file(prompt, file_path) + parsed = storage.parser.parse_prompt_file(file_path) + assert parsed is not None + assert parsed.model is None + + +def test_create_prompt_template_includes_model_field(tmp_path): # STO-071 + """create_prompt_template() output includes 'model: null' in frontmatter.""" + storage = QueueStorage(str(tmp_path)) + file_path = storage.create_prompt_template("my-task") + content = file_path.read_text() + assert "model: null" in content + + +def test_save_prompt_to_bank_includes_model_field(tmp_path): # STO-072 + """save_prompt_to_bank() output includes 'model: null' in frontmatter.""" + storage = QueueStorage(str(tmp_path)) + file_path = storage.save_prompt_to_bank("my-template") + content = file_path.read_text() + assert "model: null" in content + + +def test_bank_list_includes_model_key(tmp_path): # STO-073 + """list_bank_templates() dicts include a 'model' key.""" + storage = QueueStorage(str(tmp_path)) + storage.save_prompt_to_bank("my-template") + templates = storage.list_bank_templates() + assert len(templates) == 1 + assert "model" in templates[0] + + def test_parse_defaults_when_keys_missing(tmp_path): # STO-024 """Minimal frontmatter → defaults: priority=0, max_retries=3, context_files=[], estimated_tokens=None. From 56454767672b29b06ea16ad14670636e6f5c0112 Mon Sep 17 00:00:00 2001 From: Matthew Tibbits Date: Tue, 31 Mar 2026 05:06:09 +0000 Subject: [PATCH 5/6] feat: add batch-wizard skill for guided batch job creation Ten-phase interactive workflow that walks users through scoping, target discovery, prompt design, variable extraction, CSV generation, priority config, dry-run generation, adversarial red-team review, token efficiency optimization, and launch. Includes a visual progress tracker at each phase transition. Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Matthew Tibbits --- skills/batch-wizard/SKILL.md | 277 +++++++++++++++++++++++++++++++++++ 1 file changed, 277 insertions(+) create mode 100644 skills/batch-wizard/SKILL.md diff --git a/skills/batch-wizard/SKILL.md b/skills/batch-wizard/SKILL.md new file mode 100644 index 0000000..72e6f4f --- /dev/null +++ b/skills/batch-wizard/SKILL.md @@ -0,0 +1,277 @@ +--- +name: batch-wizard +description: > + Guided multi-phase workflow to design, red-team, optimize, and generate + a batch of claude-queue jobs. Walks the user through scoping, target + discovery, prompt design, variable extraction, CSV generation, priority + config, job generation, adversarial review, token efficiency, and launch. + Triggers on: "create a batch", "design queue jobs", "batch wizard", + "plan a batch run", "generate jobs for", "queue a bunch of", + "batch workflow". +allowed-tools: [Bash, Read, Glob, Grep, Write, Edit, Agent] +argument-hint: "[project path or short description of the work]" +disable-model-invocation: false +--- + +# Batch Job Wizard + +Guide the user through designing and generating a batch of claude-queue +jobs. Follow each phase in order. **Do NOT skip phases** unless the user +explicitly asks to. Ask for confirmation before advancing to the next +phase. + +**Flexible entry:** If the user already has a template, CSV, or partial +work, acknowledge what exists and pick up from the appropriate phase +rather than forcing them to restart. + +## Progress Tracker + +At the start of **every phase** (including Phase 1), print the progress +tracker below. Mark completed phases with `[x]`, the current phase with +`-->`, and future phases with `[ ]`. This gives the user a visual map of +where they are and what decisions are coming. + +``` +Batch Wizard Progress: + [x] 1. Scope + [x] 2. Target Discovery + --> 3. Prompt Design + [ ] 4. Template Variables + [ ] 5. CSV Generation + [ ] 6. Priority & Configuration + [ ] 7. Generate (Dry Run) + [ ] 8. Red Team Review + [ ] 9. Token Efficiency Review + [ ] 10. Review & Launch +``` + +When re-entering a phase (e.g., looping back from Phase 8 to revise the +template), mark the revisited phase with `-->` and keep earlier phases +as `[x]`. Phases after the current one revert to `[ ]` only if their +output is invalidated by the revision. + +--- + +## Phase 1: Scope + +Goal: Understand what the user wants to accomplish and where. + +- Ask: What project/directory are these jobs for? +- Ask: What is the goal? (refactor, review, documentation, tests, migration, etc.) +- Ask: Roughly how many targets do you expect? +- Explore the project briefly (read CLAUDE.md, scan directory structure) to + build context for later phases. + +Confirm scope before proceeding. + +--- + +## Phase 2: Target Discovery + +Goal: Build the concrete list of targets (files, functions, modules, etc.) +that each job will operate on. + +- Based on Phase 1, use Glob/Grep/Agent to enumerate candidates. +- Present the list to the user. Include the count. +- Ask: Should any targets be excluded? Are there any missing? +- Finalize the target list. + +--- + +## Phase 3: Prompt Design + +Goal: Craft a high-quality prompt for one representative target. + +- Pick a representative target (ideally one of medium complexity). +- Draft a complete prompt — title, context, step-by-step instructions, + expected output — as it would appear in a queue `.md` file body. +- Show it to the user for feedback. +- Iterate until they approve the prompt. + +**Tip:** Write the prompt as if addressing a capable colleague who has +never seen this codebase and has no conversation history. Be specific +about what to read, what to change, and what to verify. + +--- + +## Phase 4: Template Variables + +Goal: Parameterize the approved prompt so it works across all targets. + +- Identify which parts of the prompt vary per target (filenames, paths, + function names, module names, etc.). +- Replace them with `{{variable}}` placeholders. +- Show the user the parameterized template side-by-side with the original + to confirm nothing was lost. +- List the variables and their meanings. + +--- + +## Phase 5: CSV Generation + +Goal: Produce the data file that drives batch generation. + +- Generate a CSV (or TSV) with one row per target and columns matching + each `{{variable}}`. +- Show a preview of the first 5 and last 2 rows for confirmation. +- Report the total row count and verify it matches the Phase 2 target list. + +--- + +## Phase 6: Priority & Configuration + +Goal: Set the YAML frontmatter values for the batch. + +- Ask about or recommend: + - `priority` / `--base-priority` / `--priority-step` — explain that + without `--base-priority`, all jobs get the same priority and + execution order becomes non-deterministic. + - `model` — whether a specific model is needed or the default suffices. + - `max_retries` — recommend `-1` (unlimited) for idempotent tasks, + `3` for tasks with side effects. + - `working_directory` — confirm the absolute path. + - `context_files` — any files every job should have loaded. +- Show the complete frontmatter block for approval. + +--- + +## Phase 7: Generate (Dry Run) + +Goal: Write the template and CSV, validate, and preview before committing. + +- Write the template to `~/.claude-queue/bank/.md` +- Write the CSV alongside it or to a temp location. +- Run: `claude-queue batch validate --data ` +- Run: `claude-queue batch generate --data --base-priority [--priority-step ] --dry-run` +- Show the dry-run output for review. +- Ask: Does everything look right? + +--- + +## Phase 8: Red Team Review + +Goal: Stress-test the prompt and batch design before committing real +compute time. Each job will run in a **clean context window** with no +memory of this conversation, so the prompt must stand completely on its +own. + +Walk through each of the following questions with the user. For each one, +explain *why* it matters — not just the question but the failure mode it +prevents. + +### 8a. Scope & Guardrails + +> Could Claude, starting from a blank context with only this prompt, +> wander off into a rabbit hole and never return useful output? + +Common failure modes: +- Vague verbs ("improve", "refactor", "clean up") without success criteria +- No explicit boundary on what files/directories to touch +- No instruction to stop and report rather than guess when uncertain + +If the answer is yes, suggest adding scoping guardrails to the prompt. +Acknowledge the tradeoff: tighter scope limits creativity, but +well-guided execution has a higher probability of producing useful output +across dozens of jobs. + +### 8b. Hidden Assumptions + +> Are there things you know about this project — conventions, gotchas, +> recent decisions, tribal knowledge — that the prompt doesn't mention? + +Think of it this way: if you handed this prompt to a competent intern on +their first day, what context would you need to give them verbally? +That context should be in the prompt. + +Examples: "we use tabs not spaces", "don't modify the generated files in +`build/`", "the `_legacy` suffix means do-not-touch", "PR titles must +follow conventional commits". + +### 8c. Parallelism & Dependencies + +> Can these jobs run in parallel, or does one job's output affect another? + +`claude-queue` currently executes one job at a time, but this may change +in the future. Even with serial execution, consider: +- Does job N modify a file that job N+1 also reads? (merge conflicts) +- Does job order matter? (e.g., creating an interface before implementing it) +- Should certain jobs be grouped at a higher priority to run first? + +If dependencies exist, discuss whether to split into separate batches +with different base priorities or add explicit ordering. + +### 8d. Idempotency + +> If a job runs twice (due to crash recovery), will the second run +> produce a broken result? + +`claude-queue` has at-least-once semantics. If the daemon crashes +mid-execution, the job will re-run. Flag any jobs that create resources, +send messages, open PRs, or make API calls — these need idempotency +guards in the prompt (e.g., "check if the PR already exists before +creating one"). + +After this review, offer to revise the template. If changes are made, +re-run the dry run from Phase 7 to confirm. + +--- + +## Phase 9: Token Efficiency Review + +Goal: Minimize wasted tokens across the batch without sacrificing quality. +Every inefficiency is multiplied by the number of jobs. + +Walk through these considerations with the user: + +### 9a. Context Files + +- Are the `context_files` in frontmatter actually needed by every job? + Files loaded via `context_files` consume input tokens on every run. +- Could some context be inlined in the prompt instead (a 3-line snippet + vs. loading a 500-line file)? +- Conversely, does the prompt ask Claude to "read file X" when it could + be a `context_file` instead (saving a tool-call round trip)? + +### 9b. Prompt Verbosity + +- Is the prompt longer than it needs to be? Look for: + - Repeated instructions (said two different ways) + - Excessive examples (one clear example beats three) + - Boilerplate that adds no information +- Every extra token in the prompt is multiplied by the job count. For a + batch of 100 jobs, trimming 500 tokens from the prompt saves 50,000 + input tokens. + +### 9c. Model Selection + +- Does every job need the most capable (and most expensive) model? +- Could a smaller/faster model handle straightforward tasks (e.g., + simple renames, formatting fixes) while reserving the larger model for + complex reasoning? +- Remind the user that `model:` can be set per-template in frontmatter. + +### 9d. Output Scope + +- Does the prompt constrain what Claude outputs? Without guidance, Claude + may produce verbose explanations, summaries, or commentary that consume + output tokens without adding value. +- Consider adding: "Do not explain your changes. Just make them." or + "Keep your response under 200 words" where appropriate. + +Summarize estimated token impact if changes are made (rough order of +magnitude is fine). Offer to revise the template. + +--- + +## Phase 10: Review & Launch + +Goal: Final review and optional queue start. + +- Run: `claude-queue batch generate --data --base-priority [--priority-step ]` +- Run: `claude-queue status --detailed` — show what will execute. +- Report: total job count, estimated run time (based on ~1-2 min/job for + typical prompts), priority ordering. +- Ask: Ready to start? Or do you want to review individual job files first? +- If the user says go: `claude-queue start` +- Remind the user they can monitor progress with `claude-queue status` + and cancel individual jobs with `claude-queue cancel `. From 22aa4ab9333b5c36f4badf59ea0d47dd4058009f Mon Sep 17 00:00:00 2001 From: Matthew Tibbits Date: Tue, 31 Mar 2026 05:10:53 +0000 Subject: [PATCH 6/6] fix: address red-team review feedback for batch-wizard skill - Move skill to src/claude_code_queue/skills/batch-wizard/ to match existing queue skill location and package data glob - Update install-skill CLI to discover and install all bundled skills, with optional skill_name argument for selective install - Remove invalid allowed-tools frontmatter field - Use explicit CSV path (~/.claude-queue/bank/) instead of vague location - Fix speculative language in Phase 8c parallelism section - Widen time estimate in Phase 10 to ~1-3 min/job Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Matthew Tibbits --- src/claude_code_queue/cli.py | 50 +++++++++++++------ .../skills}/batch-wizard/SKILL.md | 12 ++--- 2 files changed, 40 insertions(+), 22 deletions(-) rename {skills => src/claude_code_queue/skills}/batch-wizard/SKILL.md (96%) diff --git a/src/claude_code_queue/cli.py b/src/claude_code_queue/cli.py index 2c24b70..8a60331 100644 --- a/src/claude_code_queue/cli.py +++ b/src/claude_code_queue/cli.py @@ -239,10 +239,16 @@ def main(): # Install skill subcommand install_skill_parser = subparsers.add_parser( - "install-skill", help="Install the Claude Code skill to ~/.claude/skills/" + "install-skill", help="Install Claude Code skills to ~/.claude/skills/" ) install_skill_parser.add_argument( - "--force", action="store_true", help="Overwrite existing skill file" + "--force", action="store_true", help="Overwrite existing skill files" + ) + install_skill_parser.add_argument( + "skill_name", + nargs="?", + default=None, + help="Install a specific skill (e.g. 'queue', 'batch-wizard'). Installs all if omitted.", ) # Cleanup subcommand @@ -719,23 +725,35 @@ def cmd_batch_variables(args) -> int: def cmd_install_skill(args) -> int: - """Install the Claude Code skill file to ~/.claude/skills/queue/SKILL.md.""" - dest = Path.home() / ".claude" / "skills" / "queue" / "SKILL.md" - skill_src = Path(__file__).parent / "skills" / "queue" / "SKILL.md" + """Install Claude Code skill files to ~/.claude/skills/.""" + skills_pkg_dir = Path(__file__).parent / "skills" + available = [d.name for d in skills_pkg_dir.iterdir() if d.is_dir() and (d / "SKILL.md").exists()] - if not skill_src.exists(): - print("Error: bundled SKILL.md not found in package installation.") - return 1 + if args.skill_name: + if args.skill_name not in available: + print(f"Error: unknown skill '{args.skill_name}'. Available: {', '.join(sorted(available))}") + return 1 + to_install = [args.skill_name] + else: + to_install = sorted(available) - if dest.exists() and not args.force: - print(f"Skill already installed at {dest}") - print("Use --force to overwrite.") - return 1 + errors = 0 + for name in to_install: + skill_src = skills_pkg_dir / name / "SKILL.md" + dest = Path.home() / ".claude" / "skills" / name / "SKILL.md" + + if dest.exists() and not args.force: + print(f" Skill '{name}' already installed at {dest} (use --force to overwrite)") + errors += 1 + continue + + dest.parent.mkdir(parents=True, exist_ok=True) + dest.write_text(skill_src.read_text(encoding="utf-8"), encoding="utf-8") + print(f" Installed '{name}' to {dest}") - dest.parent.mkdir(parents=True, exist_ok=True) - dest.write_text(skill_src.read_text(encoding="utf-8"), encoding="utf-8") - print(f"Skill installed to {dest}") - print("Restart Claude Code for the /queue skill to become available.") + if errors: + return 1 + print("Restart Claude Code for skills to become available.") return 0 diff --git a/skills/batch-wizard/SKILL.md b/src/claude_code_queue/skills/batch-wizard/SKILL.md similarity index 96% rename from skills/batch-wizard/SKILL.md rename to src/claude_code_queue/skills/batch-wizard/SKILL.md index 72e6f4f..0709963 100644 --- a/skills/batch-wizard/SKILL.md +++ b/src/claude_code_queue/skills/batch-wizard/SKILL.md @@ -8,7 +8,6 @@ description: > Triggers on: "create a batch", "design queue jobs", "batch wizard", "plan a batch run", "generate jobs for", "queue a bunch of", "batch workflow". -allowed-tools: [Bash, Read, Glob, Grep, Write, Edit, Agent] argument-hint: "[project path or short description of the work]" disable-model-invocation: false --- @@ -140,7 +139,7 @@ Goal: Set the YAML frontmatter values for the batch. Goal: Write the template and CSV, validate, and preview before committing. - Write the template to `~/.claude-queue/bank/.md` -- Write the CSV alongside it or to a temp location. +- Write the CSV to `~/.claude-queue/bank/.csv` - Run: `claude-queue batch validate --data ` - Run: `claude-queue batch generate --data --base-priority [--priority-step ] --dry-run` - Show the dry-run output for review. @@ -191,8 +190,9 @@ follow conventional commits". > Can these jobs run in parallel, or does one job's output affect another? -`claude-queue` currently executes one job at a time, but this may change -in the future. Even with serial execution, consider: +`claude-queue` currently executes one job at a time. If parallel +execution is added later, dependency issues become critical. Even with +serial execution, consider: - Does job N modify a file that job N+1 also reads? (merge conflicts) - Does job order matter? (e.g., creating an interface before implementing it) - Should certain jobs be grouped at a higher priority to run first? @@ -269,8 +269,8 @@ Goal: Final review and optional queue start. - Run: `claude-queue batch generate --data --base-priority [--priority-step ]` - Run: `claude-queue status --detailed` — show what will execute. -- Report: total job count, estimated run time (based on ~1-2 min/job for - typical prompts), priority ordering. +- Report: total job count, estimated run time (based on ~1-3 min/job for + typical prompts, longer for complex multi-file tasks), priority ordering. - Ask: Ready to start? Or do you want to review individual job files first? - If the user says go: `claude-queue start` - Remind the user they can monitor progress with `claude-queue status`