From a26198e568ed4da9ec601a806020884cdaf0e143 Mon Sep 17 00:00:00 2001
From: Matthew Tibbits <mtibbits@users.noreply.github.com>
Date: Sun, 15 Mar 2026 13:24:44 +0000
Subject: [PATCH 1/6] test: add 23 unit tests for rate-limit artifact cleanup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Covers _cleanup_rate_limit_artifacts() and _do_cleanup_rate_limit_artifacts()
from PR #32. Uses tmp_path-based fake ~/.claude/ directories with
Path.home() patched — no real filesystem state touched.

Tests (CLN-001 through CLN-023):
- Deletion of all four artifact types (JSONL, todo, debug, telemetry)
- Preservation of old, large, non-empty, and uncorrelated files
- Cleanup only triggered on rate-limit results (not success/failure)
- Missing directories, per-file OSError isolation, top-level exception safety
- Resolved working directory stashing, YAML non-persistence, fallback

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: Matthew Tibbits <mtibbits@users.noreply.github.com>
---
 tests/test_cleanup.py | 597 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 597 insertions(+)
 create mode 100644 tests/test_cleanup.py

diff --git a/tests/test_cleanup.py b/tests/test_cleanup.py
new file mode 100644
index 0000000..6c02c26
--- /dev/null
+++ b/tests/test_cleanup.py
@@ -0,0 +1,597 @@
+"""
+Tests for rate-limit artifact cleanup in queue_manager.py.
+
+Covers _cleanup_rate_limit_artifacts() and _do_cleanup_rate_limit_artifacts().
+Uses tmp_path-based fake ~/.claude/ directories to avoid touching real state.
+"""
+
+import os
+import time
+from datetime import datetime, timedelta
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from claude_code_queue.models import (
+    ExecutionResult,
+    PromptStatus,
+    QueuedPrompt,
+    QueueState,
+    RateLimitInfo,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+SESSION_UUID = "00134021-1e30-4928-b9af-e92a676ab248"
+OTHER_UUID = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"
+FAKE_WORKING_DIR = "/home/testuser/project"
+
+
+def _make_claude_dirs(tmp_path, working_dir=FAKE_WORKING_DIR):
+    """Create the four artifact directories under a fake ~/.claude/.
+
+    Returns (claude_dir, jsonl_dir, todos_dir, debug_dir, telemetry_dir).
+    """
+    claude_dir = tmp_path / ".claude"
+    encoded = working_dir.replace("/", "-")
+    jsonl_dir = claude_dir / "projects" / encoded
+    todos_dir = claude_dir / "todos"
+    debug_dir = claude_dir / "debug"
+    telemetry_dir = claude_dir / "telemetry"
+    for d in (jsonl_dir, todos_dir, debug_dir, telemetry_dir):
+        d.mkdir(parents=True)
+    return claude_dir, jsonl_dir, todos_dir, debug_dir, telemetry_dir
+
+
+def _write_file(path, size_bytes=100, content=None):
+    """Write a file with a given size or explicit content."""
+    if content is not None:
+        path.write_text(content)
+    else:
+        path.write_bytes(b"x" * size_bytes)
+
+
+def _make_prompt(working_dir=FAKE_WORKING_DIR):
+    """Create a prompt with last_executed set to 1 second ago and resolved working dir.
+
+    Using a 1-second offset avoids filesystem mtime-precision races: ext4 has
+    1-second granularity, so a file written "now" may have an mtime equal to
+    or slightly before datetime.now().timestamp().
+    """
+    p = QueuedPrompt(
+        id="abc12345",
+        content="test task",
+        working_directory=working_dir,
+        status=PromptStatus.EXECUTING,
+    )
+    p.last_executed = datetime.now() - timedelta(seconds=1)
+    p._resolved_working_directory = str(Path(working_dir).resolve())
+    return p
+
+
+def _rate_limit_result() -> ExecutionResult:
+    return ExecutionResult(
+        success=False,
+        output="usage limit reached",
+        error="",
+        rate_limit_info=RateLimitInfo(is_rate_limited=True, reset_time=None),
+        execution_time=0.1,
+    )
+
+
+# ===========================================================================
+# Basic Cleanup — All Four Artifact Types  (CLN-001 through CLN-004)
+# ===========================================================================
+
+
+def test_cleanup_deletes_rate_limited_jsonl(tmp_path, manager):  # CLN-001
+    """A small, recent JSONL file is deleted; its UUID is used for correlated cleanup."""
+    claude_dir, jsonl_dir, *_ = _make_claude_dirs(tmp_path)
+    prompt = _make_prompt()
+
+    jsonl_file = jsonl_dir / f"{SESSION_UUID}.jsonl"
+    _write_file(jsonl_file, size_bytes=4000)  # 4 KB — rate-limited size
+
+    with patch("pathlib.Path.home", return_value=tmp_path):
+        manager.state = QueueState()
+        manager.state.add_prompt(prompt)
+        manager._cleanup_rate_limit_artifacts(prompt)
+
+    assert not jsonl_file.exists(), "Rate-limited JSONL file should be deleted"
+    assert "Cleaned up" in prompt.execution_log
+
+
+def test_cleanup_deletes_correlated_todo_stub(tmp_path, manager):  # CLN-002
+    """A 2-byte todo stub whose UUID matches the JSONL file is deleted."""
+    claude_dir, jsonl_dir, todos_dir, *_ = _make_claude_dirs(tmp_path)
+    prompt = _make_prompt()
+
+    _write_file(jsonl_dir / f"{SESSION_UUID}.jsonl", size_bytes=4000)
+    todo_file = todos_dir / f"{SESSION_UUID}-agent-{SESSION_UUID}.json"
+    _write_file(todo_file, content="[]")
+
+    with patch("pathlib.Path.home", return_value=tmp_path):
+        manager.state = QueueState()
+        manager.state.add_prompt(prompt)
+        manager._cleanup_rate_limit_artifacts(prompt)
+
+    assert not todo_file.exists(), "2-byte todo stub should be deleted"
+
+
+def test_cleanup_deletes_correlated_debug_file(tmp_path, manager):  # CLN-003
+    """A debug file whose UUID matches the JSONL file is deleted."""
+    claude_dir, jsonl_dir, todos_dir, debug_dir, _ = _make_claude_dirs(tmp_path)
+    prompt = _make_prompt()
+
+    _write_file(jsonl_dir / f"{SESSION_UUID}.jsonl", size_bytes=4000)
+    debug_file = debug_dir / f"{SESSION_UUID}.txt"
+    _write_file(debug_file, size_bytes=13000)
+
+    with patch("pathlib.Path.home", return_value=tmp_path):
+        manager.state = QueueState()
+        manager.state.add_prompt(prompt)
+        manager._cleanup_rate_limit_artifacts(prompt)
+
+    assert not debug_file.exists(), "Correlated debug file should be deleted"
+
+
+def test_cleanup_deletes_correlated_telemetry_file(tmp_path, manager):  # CLN-004
+    """A telemetry file whose session UUID matches the JSONL file is deleted."""
+    claude_dir, jsonl_dir, _, _, telemetry_dir = _make_claude_dirs(tmp_path)
+    prompt = _make_prompt()
+
+    _write_file(jsonl_dir / f"{SESSION_UUID}.jsonl", size_bytes=4000)
+    telemetry_file = telemetry_dir / f"1p_failed_events.{SESSION_UUID}.{OTHER_UUID}.json"
+    _write_file(telemetry_file, size_bytes=30000)
+
+    with patch("pathlib.Path.home", return_value=tmp_path):
+        manager.state = QueueState()
+        manager.state.add_prompt(prompt)
+        manager._cleanup_rate_limit_artifacts(prompt)
+
+    assert not telemetry_file.exists(), "Correlated telemetry file should be deleted"
+
+
+# ===========================================================================
+# Preservation — Files That Must NOT Be Deleted  (CLN-005 through CLN-009)
+# ===========================================================================
+
+
+def test_cleanup_preserves_old_jsonl(tmp_path, manager):  # CLN-005
+    """JSONL files older than last_executed are not deleted."""
+    claude_dir, jsonl_dir, *_ = _make_claude_dirs(tmp_path)
+
+    old_jsonl = jsonl_dir / f"{SESSION_UUID}.jsonl"
+    _write_file(old_jsonl, size_bytes=4000)
+    # Set mtime to 1 hour ago
+    old_time = time.time() - 3600
+    os.utime(old_jsonl, (old_time, old_time))
+
+    prompt = _make_prompt()
+
+    with patch("pathlib.Path.home", return_value=tmp_path):
+        manager.state = QueueState()
+        manager.state.add_prompt(prompt)
+        manager._cleanup_rate_limit_artifacts(prompt)
+
+    assert old_jsonl.exists(), "Old JSONL file must be preserved"
+
+
+def test_cleanup_preserves_large_jsonl(tmp_path, manager):  # CLN-006
+    """JSONL files >= 10 KB (successful runs) are not deleted even if recent."""
+    claude_dir, jsonl_dir, *_ = _make_claude_dirs(tmp_path)
+    prompt = _make_prompt()
+
+    large_jsonl = jsonl_dir / f"{SESSION_UUID}.jsonl"
+    _write_file(large_jsonl, size_bytes=150_000)  # 150 KB — successful run
+
+    with patch("pathlib.Path.home", return_value=tmp_path):
+        manager.state = QueueState()
+        manager.state.add_prompt(prompt)
+        manager._cleanup_rate_limit_artifacts(prompt)
+
+    assert large_jsonl.exists(), "Large JSONL file (successful run) must be preserved"
+
+
+def test_cleanup_preserves_non_empty_todo(tmp_path, manager):  # CLN-007
+    """Todo files > 2 bytes are not deleted even when UUID-correlated."""
+    claude_dir, jsonl_dir, todos_dir, *_ = _make_claude_dirs(tmp_path)
+    prompt = _make_prompt()
+
+    _write_file(jsonl_dir / f"{SESSION_UUID}.jsonl", size_bytes=4000)
+    real_todo = todos_dir / f"{SESSION_UUID}-agent-{SESSION_UUID}.json"
+    _write_file(real_todo, size_bytes=800)  # legitimate todo
+
+    with patch("pathlib.Path.home", return_value=tmp_path):
+        manager.state = QueueState()
+        manager.state.add_prompt(prompt)
+        manager._cleanup_rate_limit_artifacts(prompt)
+
+    assert real_todo.exists(), "Non-empty todo file must be preserved"
+
+
+def test_cleanup_preserves_old_debug_file(tmp_path, manager):  # CLN-008
+    """Debug files older than last_executed are not deleted even with UUID match."""
+    claude_dir, jsonl_dir, _, debug_dir, _ = _make_claude_dirs(tmp_path)
+
+    # Create JSONL file with current timestamp
+    _write_file(jsonl_dir / f"{SESSION_UUID}.jsonl", size_bytes=4000)
+
+    # Create debug file with old timestamp
+    debug_file = debug_dir / f"{SESSION_UUID}.txt"
+    _write_file(debug_file, size_bytes=13000)
+    old_time = time.time() - 3600
+    os.utime(debug_file, (old_time, old_time))
+
+    prompt = _make_prompt()
+
+    with patch("pathlib.Path.home", return_value=tmp_path):
+        manager.state = QueueState()
+        manager.state.add_prompt(prompt)
+        manager._cleanup_rate_limit_artifacts(prompt)
+
+    assert debug_file.exists(), "Old debug file must be preserved (timestamp guard)"
+
+
+def test_cleanup_does_not_delete_debug_without_jsonl_match(tmp_path, manager):  # CLN-009
+    """Debug files are only deleted when their UUID matches a rate-limited JSONL file.
+
+    If no JSONL file matches (e.g. it's >= 10 KB), the debug file is untouched.
+    """
+    claude_dir, jsonl_dir, _, debug_dir, _ = _make_claude_dirs(tmp_path)
+    prompt = _make_prompt()
+
+    # JSONL file is large (successful run) — no UUID collected
+    _write_file(jsonl_dir / f"{SESSION_UUID}.jsonl", size_bytes=150_000)
+
+    # Debug file exists for same UUID — must NOT be deleted
+    debug_file = debug_dir / f"{SESSION_UUID}.txt"
+    _write_file(debug_file, size_bytes=13000)
+
+    with patch("pathlib.Path.home", return_value=tmp_path):
+        manager.state = QueueState()
+        manager.state.add_prompt(prompt)
+        manager._cleanup_rate_limit_artifacts(prompt)
+
+    assert debug_file.exists(), "Debug file must not be deleted without JSONL UUID match"
+
+
+# ===========================================================================
+# Cleanup Not Triggered for Other Result Types  (CLN-010, CLN-011)
+# ===========================================================================
+
+
+def test_cleanup_not_called_on_success(tmp_path, manager, mocker):  # CLN-010
+    """Successful execution does not trigger artifact cleanup."""
+    prompt = QueuedPrompt(content="task")
+    manager.state = manager.storage.load_queue_state()
+    manager.state.add_prompt(prompt)
+
+    spy = mocker.patch.object(manager, "_cleanup_rate_limit_artifacts")
+
+    success = ExecutionResult(success=True, output="done", error="", execution_time=0.1)
+    mocker.patch.object(manager.claude_interface, "execute_prompt", return_value=success)
+    manager._execute_prompt(prompt)
+
+    spy.assert_not_called()
+
+
+def test_cleanup_not_called_on_generic_failure(tmp_path, manager, mocker):  # CLN-011
+    """Generic failure does not trigger artifact cleanup."""
+    prompt = QueuedPrompt(content="task", max_retries=3)
+    manager.state = manager.storage.load_queue_state()
+    manager.state.add_prompt(prompt)
+
+    spy = mocker.patch.object(manager, "_cleanup_rate_limit_artifacts")
+
+    fail = ExecutionResult(success=False, output="", error="oops", execution_time=0.1)
+    mocker.patch.object(manager.claude_interface, "execute_prompt", return_value=fail)
+    manager._execute_prompt(prompt)
+
+    spy.assert_not_called()
+
+
+def test_cleanup_called_on_rate_limit(tmp_path, manager, mocker):  # CLN-012
+    """Rate-limited execution triggers artifact cleanup."""
+    prompt = QueuedPrompt(content="task", max_retries=3)
+    manager.state = manager.storage.load_queue_state()
+    manager.state.add_prompt(prompt)
+
+    spy = mocker.patch.object(manager, "_cleanup_rate_limit_artifacts")
+
+    mocker.patch.object(
+        manager.claude_interface, "execute_prompt", return_value=_rate_limit_result()
+    )
+    manager._execute_prompt(prompt)
+
+    spy.assert_called_once_with(prompt)
+
+
+# ===========================================================================
+# Missing Directories  (CLN-013)
+# ===========================================================================
+
+
+def test_cleanup_handles_missing_directories(tmp_path, manager):  # CLN-013
+    """Cleanup does not crash when artifact directories don't exist."""
+    # Point home at tmp_path which has no .claude/ at all
+    prompt = _make_prompt()
+    prompt.last_executed = datetime.now()
+
+    with patch("pathlib.Path.home", return_value=tmp_path):
+        manager.state = QueueState()
+        manager.state.add_prompt(prompt)
+        # Should not raise
+        manager._cleanup_rate_limit_artifacts(prompt)
+
+    # No log entry since nothing was deleted
+    assert "Cleaned up" not in prompt.execution_log
+
+
+# ===========================================================================
+# Per-File Exception Handling  (CLN-014)
+# ===========================================================================
+
+
+def test_cleanup_continues_after_oserror_on_one_file(tmp_path, manager, mocker):  # CLN-014
+    """If stat() raises OSError on the debug file, the todo file is still deleted."""
+    _, jsonl_dir, todos_dir, debug_dir, _ = _make_claude_dirs(tmp_path)
+    prompt = _make_prompt()
+
+    # Create one small JSONL
+    _write_file(jsonl_dir / f"{SESSION_UUID}.jsonl", size_bytes=4000)
+
+    # Create debug file normally
+    debug_file = debug_dir / f"{SESSION_UUID}.txt"
+    _write_file(debug_file, size_bytes=13000)
+
+    # Create todo stub — should still be cleaned up despite debug failure
+    todo_file = todos_dir / f"{SESSION_UUID}-agent-{SESSION_UUID}.json"
+    _write_file(todo_file, content="[]")
+
+    # Patch Path.stat to raise OSError only for the debug file
+    original_stat = Path.stat
+
+    def selective_stat(self, *args, **kwargs):
+        if str(self) == str(debug_file):
+            raise OSError("Permission denied")
+        return original_stat(self, *args, **kwargs)
+
+    mocker.patch.object(Path, "stat", selective_stat)
+
+    with patch("pathlib.Path.home", return_value=tmp_path):
+        manager.state = QueueState()
+        manager.state.add_prompt(prompt)
+        manager._cleanup_rate_limit_artifacts(prompt)
+
+    # Stop all mocks before checking file existence (exists() calls stat())
+    mocker.stopall()
+    assert not todo_file.exists(), "Todo file should still be deleted despite debug OSError"
+    assert debug_file.exists(), "Debug file should survive (stat raised OSError)"
+
+
+# ===========================================================================
+# Top-Level Exception Safety  (CLN-015)
+# ===========================================================================
+
+
+def test_cleanup_exception_does_not_break_result_processing(tmp_path, manager, mocker):  # CLN-015
+    """If the entire cleanup throws, _process_execution_result() still completes
+    and the prompt's RATE_LIMITED status is persisted.
+    """
+    prompt = QueuedPrompt(content="task", max_retries=3)
+    manager.state = QueueState()
+    manager.state.add_prompt(prompt)
+
+    # Make cleanup explode
+    mocker.patch.object(
+        manager, "_do_cleanup_rate_limit_artifacts",
+        side_effect=RuntimeError("disk on fire")
+    )
+
+    rl_result = _rate_limit_result()
+    prompt.status = PromptStatus.EXECUTING
+    prompt.last_executed = datetime.now()
+    manager._process_execution_result(prompt, rl_result)
+
+    assert prompt.status == PromptStatus.RATE_LIMITED, (
+        "Prompt must reach RATE_LIMITED status even when cleanup throws"
+    )
+    assert manager.state.last_processed is not None, (
+        "last_processed must be set even when cleanup throws"
+    )
+    assert "artifact cleanup failed" in prompt.execution_log
+
+
+# ===========================================================================
+# No last_executed Guard  (CLN-016)
+# ===========================================================================
+
+
+def test_cleanup_noop_without_last_executed(manager):  # CLN-016
+    """Cleanup is a no-op when prompt.last_executed is None."""
+    prompt = QueuedPrompt(content="task")
+    prompt.last_executed = None
+
+    manager.state = QueueState()
+    manager.state.add_prompt(prompt)
+
+    # Should not raise and should not log
+    manager._cleanup_rate_limit_artifacts(prompt)
+    assert "Cleaned up" not in prompt.execution_log
+
+
+# ===========================================================================
+# Resolved Working Directory  (CLN-017, CLN-018)
+# ===========================================================================
+
+
+def test_execute_prompt_stashes_resolved_working_directory(manager, mocker):  # CLN-017
+    """_execute_prompt() sets _resolved_working_directory on the prompt."""
+    prompt = QueuedPrompt(content="task", working_directory="/some/path")
+    manager.state = manager.storage.load_queue_state()
+    manager.state.add_prompt(prompt)
+
+    mocker.patch.object(
+        manager.claude_interface, "execute_prompt",
+        return_value=ExecutionResult(success=True, output="ok", error="", execution_time=0.1),
+    )
+    manager._execute_prompt(prompt)
+
+    assert prompt._resolved_working_directory is not None
+    assert prompt._resolved_working_directory == str(Path("/some/path").resolve())
+
+
+def test_cleanup_uses_resolved_working_directory(tmp_path, manager):  # CLN-018
+    """Cleanup uses _resolved_working_directory (not re-resolving working_directory)."""
+    # Set up dirs for the resolved path, not the relative one
+    claude_dir, jsonl_dir, *_ = _make_claude_dirs(tmp_path, working_dir=FAKE_WORKING_DIR)
+
+    prompt = QueuedPrompt(
+        content="task",
+        working_directory=".",  # relative — would resolve to CWD
+        status=PromptStatus.EXECUTING,
+    )
+    prompt.last_executed = datetime.now() - timedelta(seconds=1)
+    prompt._resolved_working_directory = FAKE_WORKING_DIR  # stashed at execution time
+
+    jsonl_file = jsonl_dir / f"{SESSION_UUID}.jsonl"
+    _write_file(jsonl_file, size_bytes=4000)
+
+    with patch("pathlib.Path.home", return_value=tmp_path):
+        manager.state = QueueState()
+        manager.state.add_prompt(prompt)
+        manager._cleanup_rate_limit_artifacts(prompt)
+
+    assert not jsonl_file.exists(), (
+        "Cleanup must use _resolved_working_directory, not re-resolve '.'"
+    )
+
+
+# ===========================================================================
+# Deleted Count and Logging  (CLN-019)
+# ===========================================================================
+
+
+def test_cleanup_counts_all_deleted_artifacts(tmp_path, manager, capsys):  # CLN-019
+    """The deleted count includes all four artifact types."""
+    claude_dir, jsonl_dir, todos_dir, debug_dir, telemetry_dir = _make_claude_dirs(tmp_path)
+    prompt = _make_prompt()
+
+    _write_file(jsonl_dir / f"{SESSION_UUID}.jsonl", size_bytes=4000)
+    _write_file(todos_dir / f"{SESSION_UUID}-agent-{SESSION_UUID}.json", content="[]")
+    _write_file(debug_dir / f"{SESSION_UUID}.txt", size_bytes=13000)
+    _write_file(telemetry_dir / f"1p_failed_events.{SESSION_UUID}.{OTHER_UUID}.json", size_bytes=30000)
+
+    with patch("pathlib.Path.home", return_value=tmp_path):
+        manager.state = QueueState()
+        manager.state.add_prompt(prompt)
+        manager._cleanup_rate_limit_artifacts(prompt)
+
+    assert "Cleaned up 4 rate-limit artifact(s)" in prompt.execution_log
+    captured = capsys.readouterr()
+    assert "[cleanup] Removed 4 rate-limit artifact(s)" in captured.out
+
+
+def test_cleanup_no_log_when_nothing_deleted(tmp_path, manager, capsys):  # CLN-020
+    """No log entry or print when zero files are deleted."""
+    _make_claude_dirs(tmp_path)
+    prompt = _make_prompt()
+
+    with patch("pathlib.Path.home", return_value=tmp_path):
+        manager.state = QueueState()
+        manager.state.add_prompt(prompt)
+        manager._cleanup_rate_limit_artifacts(prompt)
+
+    assert "Cleaned up" not in prompt.execution_log
+    captured = capsys.readouterr()
+    assert "[cleanup]" not in captured.out
+
+
+# ===========================================================================
+# JSONL Early Break  (CLN-021)
+# ===========================================================================
+
+
+def test_cleanup_breaks_after_first_jsonl_match(tmp_path, manager):  # CLN-021
+    """Only one JSONL file is deleted per cleanup (one subprocess = one UUID).
+
+    Even with multiple small recent JSONL files, only the first match is deleted.
+    """
+    uuid2 = "99999999-aaaa-bbbb-cccc-dddddddddddd"
+    claude_dir, jsonl_dir, *_ = _make_claude_dirs(tmp_path)
+    prompt = _make_prompt()
+
+    f1 = jsonl_dir / f"{SESSION_UUID}.jsonl"
+    f2 = jsonl_dir / f"{uuid2}.jsonl"
+    _write_file(f1, size_bytes=4000)
+    _write_file(f2, size_bytes=4000)
+
+    with patch("pathlib.Path.home", return_value=tmp_path):
+        manager.state = QueueState()
+        manager.state.add_prompt(prompt)
+        manager._cleanup_rate_limit_artifacts(prompt)
+
+    # Exactly one should be deleted (we don't know which due to glob ordering)
+    remaining = list(jsonl_dir.glob("*.jsonl"))
+    assert len(remaining) == 1, (
+        f"Expected exactly 1 JSONL file remaining after cleanup, got {len(remaining)}"
+    )
+
+
+# ===========================================================================
+# _resolved_working_directory Field  (CLN-022)
+# ===========================================================================
+
+
+def test_resolved_working_directory_not_persisted_to_yaml(tmp_path, manager):  # CLN-022
+    """_resolved_working_directory is transient and not written to YAML frontmatter."""
+    prompt = QueuedPrompt(content="task", working_directory="/some/path")
+    prompt._resolved_working_directory = "/some/path"
+    prompt.last_executed = datetime.now()
+
+    manager.state = manager.storage.load_queue_state()
+    manager.state.add_prompt(prompt)
+    manager.storage.save_queue_state(manager.state)
+
+    # Read the file back and check YAML doesn't contain the field
+    queue_files = list(manager.storage.queue_dir.glob("*.md"))
+    assert len(queue_files) == 1
+    content = queue_files[0].read_text()
+    assert "_resolved_working_directory" not in content
+
+    # Reload and verify it's None (not persisted)
+    reloaded = manager.storage.load_queue_state()
+    reloaded_prompt = reloaded.prompts[0]
+    assert reloaded_prompt._resolved_working_directory is None
+
+
+# ===========================================================================
+# Fallback When _resolved_working_directory Is None  (CLN-023)
+# ===========================================================================
+
+
+def test_cleanup_falls_back_to_resolve_when_stash_missing(tmp_path, manager):  # CLN-023
+    """If _resolved_working_directory is None, cleanup resolves working_directory directly."""
+    claude_dir, jsonl_dir, *_ = _make_claude_dirs(tmp_path, working_dir=FAKE_WORKING_DIR)
+    prompt = QueuedPrompt(
+        content="task",
+        working_directory=FAKE_WORKING_DIR,
+        status=PromptStatus.EXECUTING,
+    )
+    prompt.last_executed = datetime.now() - timedelta(seconds=1)
+    prompt._resolved_working_directory = None  # simulate missing stash
+
+    jsonl_file = jsonl_dir / f"{SESSION_UUID}.jsonl"
+    _write_file(jsonl_file, size_bytes=4000)
+
+    with patch("pathlib.Path.home", return_value=tmp_path):
+        manager.state = QueueState()
+        manager.state.add_prompt(prompt)
+        manager._cleanup_rate_limit_artifacts(prompt)
+
+    assert not jsonl_file.exists(), "Cleanup should fall back to resolving working_directory"

From 69cb1875e19e672524cb01b83556cacf17828fd5 Mon Sep 17 00:00:00 2001
From: Matthew Tibbits <mtibbits@users.noreply.github.com>
Date: Mon, 16 Mar 2026 02:26:19 +0000
Subject: [PATCH 2/6] feat: display token usage and duration after each job
 execution

After each job completes during `claude-queue start`, print duration and
token usage (input + output) extracted from Claude Code's JSONL conversation
logs. Detailed cache breakdowns are persisted in the prompt's execution log.

Uses the existing JSONL files under ~/.claude/projects/ rather than switching
to --output-format json, preserving text-mode stdout, Fix 2 rate-limit
detection, and all existing tests unchanged.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Signed-off-by: Matthew Tibbits <mtibbits@users.noreply.github.com>
---
 src/claude_code_queue/models.py        |  16 +
 src/claude_code_queue/queue_manager.py | 115 ++++-
 tests/test_session_stats.py            | 563 +++++++++++++++++++++++++
 3 files changed, 693 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_session_stats.py

diff --git a/src/claude_code_queue/models.py b/src/claude_code_queue/models.py
index 2282715..a832229 100644
--- a/src/claude_code_queue/models.py
+++ b/src/claude_code_queue/models.py
@@ -245,6 +245,22 @@ def get_stats(self) -> Dict[str, Any]:
         }
 
 
+@dataclass
+class SessionStats:
+    """Token usage statistics extracted from a session's JSONL log."""
+
+    input_tokens: int = 0
+    output_tokens: int = 0
+    cache_creation_input_tokens: int = 0
+    cache_read_input_tokens: int = 0
+    api_turns: int = 0
+
+    @property
+    def total_input_tokens(self) -> int:
+        """Total tokens billed as input (non-cached + cache-write + cache-read)."""
+        return self.input_tokens + self.cache_creation_input_tokens + self.cache_read_input_tokens
+
+
 @dataclass
 class ExecutionResult:
     """Result of executing a prompt."""
diff --git a/src/claude_code_queue/queue_manager.py b/src/claude_code_queue/queue_manager.py
index 1a91e55..3f44185 100644
--- a/src/claude_code_queue/queue_manager.py
+++ b/src/claude_code_queue/queue_manager.py
@@ -2,6 +2,7 @@
 Queue manager with execution loop.
 """
 
+import json
 import os
 import sys
 import time
@@ -10,7 +11,7 @@
 from pathlib import Path
 from typing import List, Optional, Callable, Dict, Any
 
-from .models import QueuedPrompt, QueueState, PromptStatus, ExecutionResult
+from .models import QueuedPrompt, QueueState, PromptStatus, ExecutionResult, SessionStats
 from .storage import QueueStorage
 from .claude_interface import ClaudeCodeInterface
 
@@ -266,15 +267,22 @@ def _process_execution_result(
         """Process the result of prompt execution."""
         execution_summary = f"Execution completed in {result.execution_time:.1f}s"
 
+        # Extract token usage from the JSONL conversation log BEFORE any branch
+        # logic runs.  CRITICAL: this must happen before _cleanup_rate_limit_artifacts()
+        # which deletes the JSONL file on the rate-limited path.
+        stats = self._extract_session_stats(prompt)
+
         if result.success:
             # retry_not_before is already None — cleared by _execute_prompt() via clear_retry_backoff().
             prompt.status = PromptStatus.COMPLETED
             prompt.add_log(f"{execution_summary} - SUCCESS")
             if result.output:
                 prompt.add_log(f"Output:\n{result.output}")
+            self._log_session_stats(prompt, stats)
 
             self.state.total_processed += 1
             print(f"✓ Prompt {prompt.id} completed successfully")
+            print(self._format_stats_line(result.execution_time, stats))
 
         elif result.is_non_retryable:
             # Fix B — Non-retryable error: fail immediately, skip retry counter and can_retry().
@@ -317,10 +325,12 @@ def _process_execution_result(
                     else ""
                 )
                 prompt.add_log(f"Message{source_tag}: {result.rate_limit_info.limit_message}")
+            self._log_session_stats(prompt, stats)
 
             if not was_already_rate_limited and self.state is not None:
                 self.state.rate_limited_count += 1
             print(f"⚠ Prompt {prompt.id} rate limited, will retry later")
+            print(self._format_stats_line(result.execution_time, stats))
 
             self._cleanup_rate_limit_artifacts(prompt)
 
@@ -340,23 +350,27 @@ def _process_execution_result(
                 )
                 if result.error:
                     prompt.add_log(f"Error: {result.error}")
+                self._log_session_stats(prompt, stats)
                 print(
                     f"✗ Prompt {prompt.id} failed, will retry in "
                     f"{self._generic_failure_retry_delay}s "
                     f"({prompt.retry_count}/{'∞' if prompt.max_retries == -1 else prompt.max_retries})"
                 )
+                print(self._format_stats_line(result.execution_time, stats))
             else:
                 prompt.status = PromptStatus.FAILED
                 prompt.clear_retry_backoff()    # Fix 3: clear stale field for YAML cleanliness
                 prompt.add_log(f"{execution_summary} - FAILED (max retries exceeded)")
                 if result.error:
                     prompt.add_log(f"Error: {result.error}")
+                self._log_session_stats(prompt, stats)
 
                 self.state.failed_count += 1
                 retries_str = "∞" if prompt.max_retries == -1 else str(prompt.max_retries)
                 print(
                     f"✗ Prompt {prompt.id} failed permanently after {retries_str} attempts"
                 )
+                print(self._format_stats_line(result.execution_time, stats))
 
         self.state.last_processed = datetime.now()
 
@@ -487,6 +501,105 @@ def _format_duration(self, seconds: float) -> str:
                 return f"{hours}h"
             return f"{hours}h {minutes}m"
 
+    def _extract_session_stats(self, prompt: QueuedPrompt) -> Optional[SessionStats]:
+        """Extract token usage from the JSONL conversation log for a just-finished execution.
+
+        Locates the JSONL file using the same path-encoding logic as
+        _do_cleanup_rate_limit_artifacts(), then sums usage across all assistant
+        turns.
+
+        Returns None if the JSONL cannot be found or parsed.
+        Best-effort: failures are logged but never propagate.
+
+        IMPORTANT: This method relies on Claude Code's internal file layout under
+        ~/.claude/projects/. See _do_cleanup_rate_limit_artifacts() for the same
+        caveat about undocumented internal structure.
+        """
+        if not prompt.last_executed:
+            return None
+
+        try:
+            return self._do_extract_session_stats(prompt)
+        except Exception as e:
+            prompt.add_log(f"Warning: session stats extraction failed: {e}")
+            return None
+
+    def _do_extract_session_stats(self, prompt: QueuedPrompt) -> Optional[SessionStats]:
+        """Inner implementation — may raise; caller catches all exceptions."""
+        cutoff = prompt.last_executed.timestamp()
+        claude_dir = Path.home() / ".claude"
+
+        resolved = prompt._resolved_working_directory or str(
+            Path(prompt.working_directory).resolve()
+        )
+        encoded = resolved.replace("/", "-")
+        jsonl_dir = claude_dir / "projects" / encoded
+
+        if not jsonl_dir.is_dir():
+            return None
+
+        # Find the newest .jsonl file with mtime >= cutoff (no size cap).
+        best_file = None
+        best_mtime = 0.0
+        for f in jsonl_dir.glob("*.jsonl"):
+            try:
+                st = f.stat()
+                if st.st_mtime >= cutoff and st.st_mtime > best_mtime:
+                    best_mtime = st.st_mtime
+                    best_file = f
+            except OSError:
+                pass
+
+        if best_file is None:
+            return None
+
+        # Sum usage across all assistant turns, line-by-line.
+        stats = SessionStats()
+        with open(best_file, "r") as fh:
+            for line in fh:
+                try:
+                    obj = json.loads(line)
+                except ValueError:
+                    continue
+                if obj.get("type") != "assistant" or "message" not in obj:
+                    continue
+                usage = obj["message"].get("usage", {})
+                stats.input_tokens += usage.get("input_tokens", 0)
+                stats.output_tokens += usage.get("output_tokens", 0)
+                stats.cache_creation_input_tokens += usage.get("cache_creation_input_tokens", 0)
+                stats.cache_read_input_tokens += usage.get("cache_read_input_tokens", 0)
+                stats.api_turns += 1
+
+        if stats.api_turns == 0:
+            return None
+
+        return stats
+
+    def _format_stats_line(
+        self, execution_time: float, stats: Optional[SessionStats]
+    ) -> str:
+        """Format a stats line for console output after job completion."""
+        parts = [f"Duration: {self._format_duration(execution_time)}"]
+        if stats is not None:
+            parts.append(f"Input: {stats.total_input_tokens:,} tokens")
+            parts.append(f"Output: {stats.output_tokens:,} tokens")
+        return "    " + " | ".join(parts)
+
+    def _log_session_stats(
+        self, prompt: QueuedPrompt, stats: Optional[SessionStats]
+    ) -> None:
+        """Log detailed token usage to the prompt's execution log (.md file)."""
+        if stats is None:
+            return
+        prompt.add_log(
+            f"Token usage: {stats.input_tokens:,} input"
+            f" + {stats.cache_creation_input_tokens:,} cache-write"
+            f" + {stats.cache_read_input_tokens:,} cache-read"
+            f" = {stats.total_input_tokens:,} total input,"
+            f" {stats.output_tokens:,} output"
+            f" ({stats.api_turns} API turn{'s' if stats.api_turns != 1 else ''})"
+        )
+
     def add_prompt(self, prompt: QueuedPrompt) -> bool:
         """Add a prompt to the queue."""
         try:
diff --git a/tests/test_session_stats.py b/tests/test_session_stats.py
new file mode 100644
index 0000000..54a78a9
--- /dev/null
+++ b/tests/test_session_stats.py
@@ -0,0 +1,563 @@
+"""
+Tests for SessionStats dataclass and session stats extraction from JSONL logs.
+
+Test IDs use the SS- prefix for cross-reference.
+"""
+
+import json
+import os
+from datetime import datetime, timedelta
+from pathlib import Path
+from unittest.mock import patch
+
+from claude_code_queue.models import (
+    SessionStats,
+    QueuedPrompt,
+    PromptStatus,
+    ExecutionResult,
+    RateLimitInfo,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_assistant_line(
+    input_tokens=10,
+    output_tokens=20,
+    cache_creation=100,
+    cache_read=200,
+):
+    """Build a single JSONL assistant line with the given usage values."""
+    return json.dumps({
+        "type": "assistant",
+        "message": {
+            "role": "assistant",
+            "content": [{"type": "text", "text": "hello"}],
+            "usage": {
+                "input_tokens": input_tokens,
+                "output_tokens": output_tokens,
+                "cache_creation_input_tokens": cache_creation,
+                "cache_read_input_tokens": cache_read,
+            },
+        },
+    })
+
+
+def _make_user_line():
+    """Build a JSONL user line (should be ignored by stats extraction)."""
+    return json.dumps({
+        "type": "user",
+        "message": {"role": "user", "content": "say hello"},
+    })
+
+
+def _make_queue_op_line():
+    """Build a JSONL queue-operation line (should be ignored)."""
+    return json.dumps({
+        "type": "queue-operation",
+        "operation": "enqueue",
+        "timestamp": "2026-03-15T12:00:00.000Z",
+    })
+
+
+def _make_last_prompt_line():
+    """Build a JSONL last-prompt line (should be ignored)."""
+    return json.dumps({
+        "type": "last-prompt",
+        "lastPrompt": "say hello",
+    })
+
+
+def _write_jsonl(path, lines):
+    """Write JSONL lines to a file and return the path."""
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with open(path, "w") as f:
+        for line in lines:
+            f.write(line + "\n")
+    return path
+
+
+def _setup_jsonl_for_prompt(tmp_path, prompt, lines):
+    """Create the JSONL file in the expected directory structure for a prompt.
+
+    Returns the path to the JSONL file.
+    """
+    resolved = prompt._resolved_working_directory or str(
+        Path(prompt.working_directory).resolve()
+    )
+    encoded = resolved.replace("/", "-")
+    jsonl_dir = tmp_path / ".claude" / "projects" / encoded
+    jsonl_file = jsonl_dir / "session-uuid.jsonl"
+    _write_jsonl(jsonl_file, lines)
+    return jsonl_file
+
+
+def _make_stats_prompt(tmp_path):
+    """Create a QueuedPrompt wired to a working directory under tmp_path."""
+    work_dir = tmp_path / "workdir"
+    work_dir.mkdir(exist_ok=True)
+    prompt = QueuedPrompt(
+        id="abc12345",
+        content="test",
+        working_directory=str(work_dir),
+    )
+    prompt.last_executed = datetime.now() - timedelta(seconds=5)
+    prompt._resolved_working_directory = str(work_dir)
+    return prompt
+
+
+# ===========================================================================
+# SessionStats — basic properties
+# ===========================================================================
+
+
+def test_session_stats_defaults_are_zero():  # SS-001
+    stats = SessionStats()
+    assert stats.input_tokens == 0
+    assert stats.output_tokens == 0
+    assert stats.cache_creation_input_tokens == 0
+    assert stats.cache_read_input_tokens == 0
+    assert stats.api_turns == 0
+
+
+def test_session_stats_total_input_sums_all_three():  # SS-002
+    stats = SessionStats(
+        input_tokens=10,
+        cache_creation_input_tokens=100,
+        cache_read_input_tokens=200,
+    )
+    assert stats.total_input_tokens == 310
+
+
+def test_session_stats_total_input_zero_when_all_zero():  # SS-003
+    stats = SessionStats()
+    assert stats.total_input_tokens == 0
+
+
+# ===========================================================================
+# _extract_session_stats()
+# ===========================================================================
+
+
+def test_extract_stats_single_turn(manager, tmp_path, mocker):  # SS-010
+    mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path)
+    prompt = _make_stats_prompt(tmp_path)
+    jsonl_file = _setup_jsonl_for_prompt(tmp_path, prompt, [
+        _make_user_line(),
+        _make_assistant_line(input_tokens=5, output_tokens=50, cache_creation=1000, cache_read=2000),
+    ])
+    os.utime(jsonl_file, (datetime.now().timestamp(), datetime.now().timestamp()))
+
+    stats = manager._extract_session_stats(prompt)
+
+    assert stats is not None
+    assert stats.input_tokens == 5
+    assert stats.output_tokens == 50
+    assert stats.cache_creation_input_tokens == 1000
+    assert stats.cache_read_input_tokens == 2000
+    assert stats.total_input_tokens == 3005
+    assert stats.api_turns == 1
+
+
+def test_extract_stats_multi_turn(manager, tmp_path, mocker):  # SS-011
+    mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path)
+    prompt = _make_stats_prompt(tmp_path)
+    jsonl_file = _setup_jsonl_for_prompt(tmp_path, prompt, [
+        _make_user_line(),
+        _make_assistant_line(input_tokens=3, output_tokens=100, cache_creation=5000, cache_read=8000),
+        _make_user_line(),
+        _make_assistant_line(input_tokens=1, output_tokens=200, cache_creation=5000, cache_read=8000),
+        _make_user_line(),
+        _make_assistant_line(input_tokens=1, output_tokens=150, cache_creation=0, cache_read=10000),
+    ])
+    os.utime(jsonl_file, (datetime.now().timestamp(), datetime.now().timestamp()))
+
+    stats = manager._extract_session_stats(prompt)
+
+    assert stats is not None
+    assert stats.input_tokens == 5
+    assert stats.output_tokens == 450
+    assert stats.cache_creation_input_tokens == 10000
+    assert stats.cache_read_input_tokens == 26000
+    assert stats.total_input_tokens == 36005
+    assert stats.api_turns == 3
+
+
+def test_extract_stats_non_assistant_lines_ignored(manager, tmp_path, mocker):  # SS-012
+    mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path)
+    prompt = _make_stats_prompt(tmp_path)
+    jsonl_file = _setup_jsonl_for_prompt(tmp_path, prompt, [
+        _make_queue_op_line(),
+        _make_user_line(),
+        _make_assistant_line(input_tokens=3, output_tokens=10, cache_creation=100, cache_read=200),
+        _make_last_prompt_line(),
+    ])
+    os.utime(jsonl_file, (datetime.now().timestamp(), datetime.now().timestamp()))
+
+    stats = manager._extract_session_stats(prompt)
+
+    assert stats is not None
+    assert stats.input_tokens == 3
+    assert stats.output_tokens == 10
+    assert stats.api_turns == 1
+
+
+def test_extract_stats_missing_usage_block(manager, tmp_path, mocker):  # SS-013
+    """Assistant line without message.usage should contribute 0."""
+    mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path)
+    prompt = _make_stats_prompt(tmp_path)
+    line_no_usage = json.dumps({
+        "type": "assistant",
+        "message": {
+            "role": "assistant",
+            "content": [{"type": "text", "text": "hi"}],
+        },
+    })
+    jsonl_file = _setup_jsonl_for_prompt(tmp_path, prompt, [
+        line_no_usage,
+        _make_assistant_line(input_tokens=5, output_tokens=10, cache_creation=100, cache_read=200),
+    ])
+    os.utime(jsonl_file, (datetime.now().timestamp(), datetime.now().timestamp()))
+
+    stats = manager._extract_session_stats(prompt)
+
+    assert stats is not None
+    assert stats.api_turns == 2
+    assert stats.input_tokens == 5
+    assert stats.output_tokens == 10
+
+
+def test_extract_stats_malformed_line_skipped(manager, tmp_path, mocker):  # SS-014
+    """Non-JSON lines should be skipped; valid lines still counted."""
+    mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path)
+    prompt = _make_stats_prompt(tmp_path)
+    jsonl_file = _setup_jsonl_for_prompt(tmp_path, prompt, [
+        "this is not json",
+        _make_assistant_line(input_tokens=7, output_tokens=30, cache_creation=500, cache_read=600),
+        "{bad json",
+    ])
+    os.utime(jsonl_file, (datetime.now().timestamp(), datetime.now().timestamp()))
+
+    stats = manager._extract_session_stats(prompt)
+
+    assert stats is not None
+    assert stats.input_tokens == 7
+    assert stats.output_tokens == 30
+    assert stats.api_turns == 1
+
+
+def test_extract_stats_old_mtime_returns_none(manager, tmp_path, mocker):  # SS-015
+    """JSONL file exists but mtime is before cutoff — returns None."""
+    mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path)
+    prompt = _make_stats_prompt(tmp_path)
+    jsonl_file = _setup_jsonl_for_prompt(tmp_path, prompt, [
+        _make_assistant_line(),
+    ])
+    old_time = (prompt.last_executed - timedelta(hours=1)).timestamp()
+    os.utime(jsonl_file, (old_time, old_time))
+
+    stats = manager._extract_session_stats(prompt)
+    assert stats is None
+
+
+def test_extract_stats_empty_file(manager, tmp_path, mocker):  # SS-016
+    """Empty JSONL file — returns None (0 API turns)."""
+    mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path)
+    prompt = _make_stats_prompt(tmp_path)
+    jsonl_file = _setup_jsonl_for_prompt(tmp_path, prompt, [])
+    os.utime(jsonl_file, (datetime.now().timestamp(), datetime.now().timestamp()))
+
+    stats = manager._extract_session_stats(prompt)
+    assert stats is None
+
+
+def test_extract_stats_directory_missing(manager, tmp_path, mocker):  # SS-017
+    """~/.claude/projects/<encoded>/ doesn't exist — returns None."""
+    mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path)
+    prompt = _make_stats_prompt(tmp_path)
+
+    stats = manager._extract_session_stats(prompt)
+    assert stats is None
+
+
+def test_extract_stats_resolved_dir_none_fallback(manager, tmp_path, mocker):  # SS-018
+    """When _resolved_working_directory is None, falls back to resolving working_directory."""
+    mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path)
+    prompt = _make_stats_prompt(tmp_path)
+    prompt._resolved_working_directory = None
+    resolved = str(Path(prompt.working_directory).resolve())
+    encoded = resolved.replace("/", "-")
+    jsonl_dir = tmp_path / ".claude" / "projects" / encoded
+    jsonl_file = jsonl_dir / "session.jsonl"
+    _write_jsonl(jsonl_file, [
+        _make_assistant_line(input_tokens=1, output_tokens=2, cache_creation=3, cache_read=4),
+    ])
+    os.utime(jsonl_file, (datetime.now().timestamp(), datetime.now().timestamp()))
+
+    stats = manager._extract_session_stats(prompt)
+
+    assert stats is not None
+    assert stats.total_input_tokens == 8
+
+
+def test_extract_stats_last_executed_none(manager, tmp_path):  # SS-019
+    """When last_executed is None, returns None immediately."""
+    prompt = _make_stats_prompt(tmp_path)
+    prompt.last_executed = None
+
+    stats = manager._extract_session_stats(prompt)
+    assert stats is None
+
+
+def test_extract_stats_newest_file_selected(manager, tmp_path, mocker):  # SS-020
+    """When multiple JSONL files match, the newest one is used."""
+    mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path)
+    prompt = _make_stats_prompt(tmp_path)
+    resolved = prompt._resolved_working_directory
+    encoded = resolved.replace("/", "-")
+    jsonl_dir = tmp_path / ".claude" / "projects" / encoded
+    jsonl_dir.mkdir(parents=True, exist_ok=True)
+
+    older = jsonl_dir / "old-session.jsonl"
+    _write_jsonl(older, [
+        _make_assistant_line(input_tokens=999, output_tokens=999, cache_creation=0, cache_read=0),
+    ])
+    old_time = datetime.now().timestamp() - 2
+    os.utime(older, (old_time, old_time))
+
+    newer = jsonl_dir / "new-session.jsonl"
+    _write_jsonl(newer, [
+        _make_assistant_line(input_tokens=1, output_tokens=2, cache_creation=3, cache_read=4),
+    ])
+    new_time = datetime.now().timestamp()
+    os.utime(newer, (new_time, new_time))
+
+    stats = manager._extract_session_stats(prompt)
+
+    assert stats is not None
+    assert stats.input_tokens == 1
+    assert stats.output_tokens == 2
+
+
+def test_extract_stats_exception_returns_none(manager, tmp_path):  # SS-021
+    """Internal errors are caught and None is returned."""
+    prompt = _make_stats_prompt(tmp_path)
+    with patch.object(manager, "_do_extract_session_stats", side_effect=OSError("boom")):
+        stats = manager._extract_session_stats(prompt)
+    assert stats is None
+
+
+# ===========================================================================
+# _format_stats_line()
+# ===========================================================================
+
+
+def test_format_stats_line_with_stats(manager):  # SS-030
+    stats = SessionStats(
+        input_tokens=100,
+        output_tokens=500,
+        cache_creation_input_tokens=10000,
+        cache_read_input_tokens=5000,
+        api_turns=3,
+    )
+    line = manager._format_stats_line(154.0, stats)
+    assert "Duration: 2m" in line
+    assert "Input: 15,100 tokens" in line
+    assert "Output: 500 tokens" in line
+    assert line.startswith("    ")
+
+
+def test_format_stats_line_without_stats(manager):  # SS-031
+    line = manager._format_stats_line(45.0, None)
+    assert "Duration: 45s" in line
+    assert "Input" not in line
+    assert "Output" not in line
+    assert line.startswith("    ")
+
+
+def test_format_stats_line_pipe_separators(manager):  # SS-032
+    stats = SessionStats(input_tokens=1, output_tokens=2)
+    line = manager._format_stats_line(10.0, stats)
+    assert " | " in line
+
+
+# ===========================================================================
+# _log_session_stats()
+# ===========================================================================
+
+
+def test_log_session_stats_detailed_breakdown(manager):  # SS-050
+    prompt = QueuedPrompt(id="abc12345", content="test")
+    stats = SessionStats(
+        input_tokens=402,
+        output_tokens=51568,
+        cache_creation_input_tokens=19093602,
+        cache_read_input_tokens=4255901,
+        api_turns=297,
+    )
+
+    manager._log_session_stats(prompt, stats)
+
+    assert "402 input" in prompt.execution_log
+    assert "19,093,602 cache-write" in prompt.execution_log
+    assert "4,255,901 cache-read" in prompt.execution_log
+    assert "23,349,905 total input" in prompt.execution_log
+    assert "51,568 output" in prompt.execution_log
+    assert "297 API turns" in prompt.execution_log
+
+
+def test_log_session_stats_none_no_log(manager):  # SS-051
+    prompt = QueuedPrompt(id="abc12345", content="test")
+    manager._log_session_stats(prompt, None)
+    assert "Token usage" not in prompt.execution_log
+
+
+def test_log_session_stats_single_turn_singular(manager):  # SS-052
+    prompt = QueuedPrompt(id="abc12345", content="test")
+    stats = SessionStats(input_tokens=1, output_tokens=2, api_turns=1)
+    manager._log_session_stats(prompt, stats)
+    assert "1 API turn)" in prompt.execution_log
+
+
+# ===========================================================================
+# Integration: stats printed in _process_execution_result()
+# ===========================================================================
+
+
+def test_result_success_prints_stats(manager, tmp_path, mocker, capsys):  # SS-040
+    mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path)
+    manager.state = manager.storage.load_queue_state()
+    prompt = _make_stats_prompt(tmp_path)
+    prompt.status = PromptStatus.EXECUTING
+    manager.state.add_prompt(prompt)
+    _setup_jsonl_for_prompt(tmp_path, prompt, [
+        _make_assistant_line(input_tokens=5, output_tokens=50, cache_creation=1000, cache_read=2000),
+    ])
+    result = ExecutionResult(success=True, output="done", execution_time=120.5)
+
+    manager._process_execution_result(prompt, result)
+
+    captured = capsys.readouterr().out
+    assert "completed successfully" in captured
+    assert "Duration:" in captured
+    assert "Input: 3,005 tokens" in captured
+    assert "Output: 50 tokens" in captured
+
+
+def test_result_success_no_jsonl_prints_duration_only(manager, tmp_path, mocker, capsys):  # SS-041
+    mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path)
+    manager.state = manager.storage.load_queue_state()
+    prompt = _make_stats_prompt(tmp_path)
+    prompt.status = PromptStatus.EXECUTING
+    manager.state.add_prompt(prompt)
+    result = ExecutionResult(success=True, output="done", execution_time=30.0)
+
+    manager._process_execution_result(prompt, result)
+
+    captured = capsys.readouterr().out
+    assert "Duration: 30s" in captured
+    assert "Input" not in captured
+
+
+def test_result_rate_limited_prints_stats_before_cleanup(manager, tmp_path, mocker, capsys):  # SS-042
+    """Stats must be extracted BEFORE cleanup deletes the JSONL."""
+    mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path)
+    manager.state = manager.storage.load_queue_state()
+    prompt = _make_stats_prompt(tmp_path)
+    prompt.status = PromptStatus.EXECUTING
+    manager.state.add_prompt(prompt)
+    _setup_jsonl_for_prompt(tmp_path, prompt, [
+        _make_assistant_line(input_tokens=3, output_tokens=10, cache_creation=500, cache_read=600),
+    ])
+
+    rate_info = RateLimitInfo(
+        is_rate_limited=True,
+        limit_message="usage limit reached",
+    )
+    result = ExecutionResult(
+        success=False,
+        output="",
+        error="rate limited",
+        rate_limit_info=rate_info,
+        execution_time=5.0,
+    )
+
+    manager._process_execution_result(prompt, result)
+
+    captured = capsys.readouterr().out
+    assert "rate limited" in captured
+    assert "Input: 1,103 tokens" in captured
+    assert "Output: 10 tokens" in captured
+
+
+def test_result_generic_failure_retry_prints_stats(manager, tmp_path, mocker, capsys):  # SS-043
+    mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path)
+    manager.state = manager.storage.load_queue_state()
+    prompt = _make_stats_prompt(tmp_path)
+    prompt.status = PromptStatus.EXECUTING
+    manager.state.add_prompt(prompt)
+    _setup_jsonl_for_prompt(tmp_path, prompt, [
+        _make_assistant_line(input_tokens=2, output_tokens=30, cache_creation=100, cache_read=200),
+    ])
+    result = ExecutionResult(
+        success=False, output="", error="something broke", execution_time=10.0
+    )
+
+    manager._process_execution_result(prompt, result)
+
+    captured = capsys.readouterr().out
+    assert "failed" in captured
+    assert "Input: 302 tokens" in captured
+    assert "Output: 30 tokens" in captured
+
+
+def test_result_generic_failure_permanent_prints_stats(manager, tmp_path, mocker, capsys):  # SS-044
+    mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path)
+    manager.state = manager.storage.load_queue_state()
+    prompt = _make_stats_prompt(tmp_path)
+    prompt.status = PromptStatus.EXECUTING
+    prompt.max_retries = 1
+    prompt.retry_count = 1
+    manager.state.add_prompt(prompt)
+    _setup_jsonl_for_prompt(tmp_path, prompt, [
+        _make_assistant_line(input_tokens=1, output_tokens=5, cache_creation=50, cache_read=100),
+    ])
+    result = ExecutionResult(
+        success=False, output="", error="something broke", execution_time=8.0
+    )
+
+    manager._process_execution_result(prompt, result)
+
+    captured = capsys.readouterr().out
+    assert "failed permanently" in captured
+    assert "Input: 151 tokens" in captured
+    assert "Output: 5 tokens" in captured
+
+
+def test_result_non_retryable_no_stats_printed(manager, tmp_path, mocker, capsys):  # SS-045
+    """Non-retryable errors should not print stats."""
+    mocker.patch("claude_code_queue.queue_manager.Path.home", return_value=tmp_path)
+    manager.state = manager.storage.load_queue_state()
+    prompt = _make_stats_prompt(tmp_path)
+    prompt.status = PromptStatus.EXECUTING
+    manager.state.add_prompt(prompt)
+    _setup_jsonl_for_prompt(tmp_path, prompt, [
+        _make_assistant_line(input_tokens=1, output_tokens=1, cache_creation=1, cache_read=1),
+    ])
+    result = ExecutionResult(
+        success=False,
+        output="",
+        error="nested session",
+        execution_time=1.0,
+        is_non_retryable=True,
+    )
+
+    manager._process_execution_result(prompt, result)
+
+    captured = capsys.readouterr().out
+    assert "non-retryable" in captured
+    assert "Input" not in captured
+    assert "Duration" not in captured

From 9877331747f8e76248b59449acf1b8cefd9669ec Mon Sep 17 00:00:00 2001
From: Matthew Tibbits <mtibbits@users.noreply.github.com>
Date: Mon, 16 Mar 2026 02:44:14 +0000
Subject: [PATCH 3/6] feat: add `cleanup` CLI subcommand for backlog artifact
 purge

Adds `claude-queue cleanup [--dry-run]` to remove rate-limit artifacts
from ~/.claude/. Identifies rate-limited sessions by scanning debug
transcripts for 'rate_limit_error', then deletes correlated JSONL,
todo, and telemetry files by UUID.

No claude binary needed (E3 pattern).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Signed-off-by: Matthew Tibbits <mtibbits@users.noreply.github.com>
---
 CLAUDE.md                    |   1 +
 src/claude_code_queue/cli.py | 100 +++++++++++++++++++++++++++++++
 tests/test_cli.py            | 112 +++++++++++++++++++++++++++++++++++
 3 files changed, 213 insertions(+)

diff --git a/CLAUDE.md b/CLAUDE.md
index c154483..76dba7c 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -181,5 +181,6 @@ retry_not_before: null
 | `test` | Verify claude CLI | Yes |
 | `bank save/list/use/delete` | Template bank ops | No |
 | `batch generate/validate/variables` | Batch job generation | No |
+| `cleanup [--dry-run]` | Remove rate-limit artifacts from ~/.claude/ | No |
 | `install-skill [--force]` | Copy SKILL.md to ~/.claude/skills/ | No |
 | `prompt-box` | Launch Rust TUI | No (needs Rust binary) |
diff --git a/src/claude_code_queue/cli.py b/src/claude_code_queue/cli.py
index 489fe42..3420b91 100644
--- a/src/claude_code_queue/cli.py
+++ b/src/claude_code_queue/cli.py
@@ -12,6 +12,7 @@
 import sys
 from datetime import datetime
 from pathlib import Path
+from typing import List
 
 from .batch import (
     extract_variables,
@@ -241,6 +242,15 @@ def main():
         "--force", action="store_true", help="Overwrite existing skill file"
     )
 
+    # Cleanup subcommand
+    cleanup_parser = subparsers.add_parser(
+        "cleanup", help="Remove rate-limit artifacts from ~/.claude/"
+    )
+    cleanup_parser.add_argument(
+        "--dry-run", action="store_true",
+        help="Report what would be deleted without acting",
+    )
+
     # Prompt box subcommand
     prompt_box_parser = subparsers.add_parser(
         "prompt-box", help="Launch the interactive prompt box CLI", add_help=False
@@ -279,6 +289,8 @@ def main():
             return cmd_batch(args)
         elif args.command == "install-skill":
             return cmd_install_skill(args)
+        elif args.command == "cleanup":
+            return cmd_cleanup(args)
         elif args.command == "prompt-box":
             return cmd_prompt_box(args)
         else:
@@ -721,6 +733,94 @@ def cmd_install_skill(args) -> int:
     return 0
 
 
+def cmd_cleanup(args) -> int:
+    """Remove rate-limit artifacts from ~/.claude/.
+
+    Primary identification: scan debug transcripts for 'rate_limit_error' in
+    the content (authoritative signal).  Then delete correlated JSONL, todo,
+    and telemetry files by UUID.
+
+    This is the E3 pattern: no claude binary needed.
+    """
+    claude_dir = Path.home() / ".claude"
+    dry_run = args.dry_run
+    matched = 0
+    skipped = 0
+    rate_limited_uuids: List[str] = []
+
+    # 1. Debug transcripts — primary identification via content grep.
+    #    Read the full file (max ~90 KB for successful runs) since this is a
+    #    one-time tool where correctness matters more than speed.
+    debug_dir = claude_dir / "debug"
+    if debug_dir.is_dir():
+        for debug_file in debug_dir.glob("*.txt"):
+            try:
+                with open(debug_file, "r", errors="replace") as fh:
+                    content = fh.read()
+                if "rate_limit_error" in content:
+                    rate_limited_uuids.append(debug_file.stem)
+                    if dry_run:
+                        print(f"  [dry-run] would delete {debug_file}")
+                    else:
+                        debug_file.unlink()
+                    matched += 1
+            except OSError:
+                skipped += 1
+
+    if rate_limited_uuids:
+        print(f"Identified {len(rate_limited_uuids)} rate-limited session(s)")
+
+    # 2. JSONL conversation logs — by UUID correlation
+    projects_dir = claude_dir / "projects"
+    if projects_dir.is_dir():
+        for session_uuid in rate_limited_uuids:
+            for jsonl_file in projects_dir.glob(f"*/{session_uuid}.jsonl"):
+                try:
+                    if dry_run:
+                        print(f"  [dry-run] would delete {jsonl_file}")
+                    else:
+                        jsonl_file.unlink()
+                    matched += 1
+                except OSError:
+                    skipped += 1
+
+    # 3. Todo stubs — by UUID correlation + 2-byte size guard
+    todos_dir = claude_dir / "todos"
+    if todos_dir.is_dir():
+        for session_uuid in rate_limited_uuids:
+            todo_file = todos_dir / f"{session_uuid}-agent-{session_uuid}.json"
+            try:
+                st = todo_file.stat()
+                if st.st_size <= 2:
+                    if dry_run:
+                        print(f"  [dry-run] would delete {todo_file}")
+                    else:
+                        todo_file.unlink()
+                    matched += 1
+            except OSError:
+                skipped += 1
+
+    # 4. Telemetry — by UUID correlation
+    telemetry_dir = claude_dir / "telemetry"
+    if telemetry_dir.is_dir():
+        for session_uuid in rate_limited_uuids:
+            for f in telemetry_dir.glob(f"1p_failed_events.{session_uuid}.*.json"):
+                try:
+                    if dry_run:
+                        print(f"  [dry-run] would delete {f}")
+                    else:
+                        f.unlink()
+                    matched += 1
+                except OSError:
+                    skipped += 1
+
+    action = "Would delete" if dry_run else "Deleted"
+    print(f"{action} {matched} rate-limit artifact(s)")
+    if skipped:
+        print(f"Skipped {skipped} file(s) due to errors")
+    return 0
+
+
 def cmd_prompt_box(args) -> int:
     """Launch the interactive prompt box CLI."""
     try:
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 5a6cd06..8a5c941 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -1202,3 +1202,115 @@ def test_batch_variables_template_not_found(self, tmp_path, capsys):
     def test_batch_variables_returns_0(self, tmp_path):
         code = self._run(tmp_path, "---\npriority: 0\n---\n\nProcess {{item}}")
         assert code == 0
+
+
+# ===========================================================================
+# Cleanup Command
+# ===========================================================================
+
+class TestCleanup:
+    """Tests for `claude-queue cleanup [--dry-run]`."""
+
+    def _make_artifacts(self, tmp_path, session_uuid="aaa-bbb-ccc"):
+        """Create fake rate-limit artifacts under tmp_path/.claude/."""
+        claude_dir = tmp_path / ".claude"
+        debug_dir = claude_dir / "debug"
+        projects_dir = claude_dir / "projects" / "-home-testuser-project"
+        todos_dir = claude_dir / "todos"
+        telemetry_dir = claude_dir / "telemetry"
+        for d in (debug_dir, projects_dir, todos_dir, telemetry_dir):
+            d.mkdir(parents=True)
+
+        # Debug file with rate_limit_error content
+        debug_file = debug_dir / f"{session_uuid}.txt"
+        debug_file.write_text("startup\nrate_limit_error\n")
+
+        # Correlated JSONL
+        jsonl_file = projects_dir / f"{session_uuid}.jsonl"
+        jsonl_file.write_bytes(b"x" * 5000)
+
+        # Correlated todo stub
+        todo_file = todos_dir / f"{session_uuid}-agent-{session_uuid}.json"
+        todo_file.write_text("[]")
+
+        # Correlated telemetry file
+        telemetry_file = telemetry_dir / f"1p_failed_events.{session_uuid}.other-uuid.json"
+        telemetry_file.write_text('{"events": []}')
+
+        return debug_file, jsonl_file, todo_file, telemetry_file
+
+    def test_cleanup_dry_run_does_not_delete(self, tmp_path, capsys):
+        debug_file, jsonl_file, todo_file, telemetry_file = self._make_artifacts(tmp_path)
+
+        with patch("sys.argv", ["claude-queue", "cleanup", "--dry-run"]):
+            with patch("pathlib.Path.home", return_value=tmp_path):
+                code = main()
+
+        assert code == 0
+        assert debug_file.exists(), "dry-run must not delete files"
+        assert jsonl_file.exists()
+        assert todo_file.exists()
+        assert telemetry_file.exists()
+        out = capsys.readouterr().out
+        assert "Would delete" in out
+        assert "dry-run" in out
+
+    def test_cleanup_deletes_artifacts(self, tmp_path, capsys):
+        debug_file, jsonl_file, todo_file, telemetry_file = self._make_artifacts(tmp_path)
+
+        with patch("sys.argv", ["claude-queue", "cleanup"]):
+            with patch("pathlib.Path.home", return_value=tmp_path):
+                code = main()
+
+        assert code == 0
+        assert not debug_file.exists()
+        assert not jsonl_file.exists()
+        assert not todo_file.exists()
+        assert not telemetry_file.exists()
+        out = capsys.readouterr().out
+        assert "Deleted 4 rate-limit artifact(s)" in out
+
+    def test_cleanup_preserves_non_rate_limited_debug(self, tmp_path, capsys):
+        """Debug files without rate_limit_error are not deleted."""
+        claude_dir = tmp_path / ".claude"
+        debug_dir = claude_dir / "debug"
+        debug_dir.mkdir(parents=True)
+
+        good_file = debug_dir / "good-session.txt"
+        good_file.write_text("startup\nall good\nstream completed\n")
+
+        with patch("sys.argv", ["claude-queue", "cleanup"]):
+            with patch("pathlib.Path.home", return_value=tmp_path):
+                code = main()
+
+        assert code == 0
+        assert good_file.exists()
+        assert "Deleted 0" in capsys.readouterr().out
+
+    def test_cleanup_preserves_real_todo_file(self, tmp_path, capsys):
+        """Todo files > 2 bytes are preserved even if UUID matches a rate-limited session."""
+        debug_file, jsonl_file, todo_file, telemetry_file = self._make_artifacts(tmp_path)
+        # Overwrite the stub with realistic todo content (> 2 bytes)
+        todo_file.write_text('[{"task": "implement feature", "status": "in_progress"}]')
+
+        with patch("sys.argv", ["claude-queue", "cleanup"]):
+            with patch("pathlib.Path.home", return_value=tmp_path):
+                code = main()
+
+        assert code == 0
+        assert todo_file.exists(), "real todo file (> 2 bytes) must be preserved"
+        assert not debug_file.exists()
+        assert not jsonl_file.exists()
+        # 3 deleted: debug + jsonl + telemetry (todo preserved by size guard)
+        assert "Deleted 3" in capsys.readouterr().out
+
+    def test_cleanup_handles_empty_claude_dir(self, tmp_path, capsys):
+        """Cleanup succeeds when ~/.claude/ has no artifact directories."""
+        (tmp_path / ".claude").mkdir()
+
+        with patch("sys.argv", ["claude-queue", "cleanup"]):
+            with patch("pathlib.Path.home", return_value=tmp_path):
+                code = main()
+
+        assert code == 0
+        assert "Deleted 0" in capsys.readouterr().out

From 53e0fa4599c45293fb5718bf5b1ad1d6c99ed4ed Mon Sep 17 00:00:00 2001
From: Matthew Tibbits <mtibbits@users.noreply.github.com>
Date: Mon, 16 Mar 2026 03:14:58 +0000
Subject: [PATCH 4/6] feat: add per-prompt `model` field for model selection

Allow users to specify a Claude model ID (e.g. claude-haiku-4-5-20251001)
per queued prompt via YAML frontmatter or `claude-queue add --model`.
When set, the value is passed to the claude CLI via `--model <id>`.

- models.py: add `model: Optional[str]` field to QueuedPrompt
- storage.py: R7 type-safe coercion in parse, round-trip in write,
  include in templates and bank list output
- claude_interface.py: inject `--model` flag before positional prompt
- cli.py: add `--model/-m` to `add` subparser, display in `bank list`
- CLAUDE.md: add model field to YAML schema, --model to CLI reference
- 14 new tests across all 4 test files

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Signed-off-by: Matthew Tibbits <mtibbits@users.noreply.github.com>
---
 CLAUDE.md                                 |   3 +-
 src/claude_code_queue/claude_interface.py |   3 +
 src/claude_code_queue/cli.py              |   6 ++
 src/claude_code_queue/models.py           |   1 +
 src/claude_code_queue/storage.py          |  11 +++
 tests/test_claude_interface.py            |  36 ++++++++
 tests/test_cli.py                         |  25 ++++++
 tests/test_models.py                      |  17 ++++
 tests/test_storage.py                     | 103 ++++++++++++++++++++++
 9 files changed, 204 insertions(+), 1 deletion(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index c154483..b06fc1f 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -157,6 +157,7 @@ working_directory: .     # Execution CWD (resolved relative)
 context_files: []        # Files passed as @-references
 max_retries: 3           # Total attempts (1=no retry, -1=unlimited)
 estimated_tokens: null   # Optional hint
+model: null              # Optional Claude model ID (e.g. claude-haiku-4-5-20251001)
 # Internal fields (managed by the queue, not user-edited):
 status: queued
 retry_count: 0
@@ -173,7 +174,7 @@ retry_not_before: null
 | Command | Purpose | Needs `claude` binary? |
 |---|---|---|
 | `start [--verbose] [--no-skip-permissions]` | Run queue loop | Yes |
-| `add <prompt> [-p priority]` | Quick-add prompt | No |
+| `add <prompt> [-p priority] [-m model]` | Quick-add prompt | No |
 | `template <name> [-p priority]` | Create template .md | No |
 | `status [--json] [--detailed]` | Queue stats | No |
 | `list [--status <s>] [--json]` | List prompts | No |
diff --git a/src/claude_code_queue/claude_interface.py b/src/claude_code_queue/claude_interface.py
index 19d8b3f..ebaad5c 100644
--- a/src/claude_code_queue/claude_interface.py
+++ b/src/claude_code_queue/claude_interface.py
@@ -304,6 +304,9 @@ def execute_prompt(self, prompt: QueuedPrompt) -> ExecutionResult:
                 if context_refs:
                     full_prompt = f"{' '.join(context_refs)} {prompt.content}"
 
+            if prompt.model is not None:
+                cmd.extend(["--model", prompt.model])
+
             cmd.append(full_prompt)
 
             # E1 — Use cwd= instead of os.chdir() to set the subprocess working directory.
diff --git a/src/claude_code_queue/cli.py b/src/claude_code_queue/cli.py
index 489fe42..b83539d 100644
--- a/src/claude_code_queue/cli.py
+++ b/src/claude_code_queue/cli.py
@@ -139,6 +139,9 @@ def main():
     add_parser.add_argument(
         "--estimated-tokens", "-t", type=int, help="Estimated token usage"
     )
+    add_parser.add_argument(
+        "--model", "-m", default=None, help="Claude model ID (e.g. claude-haiku-4-5-20251001)"
+    )
 
     template_parser = subparsers.add_parser(
         "template", help="Create a prompt template file"
@@ -322,6 +325,7 @@ def cmd_add(args) -> int:
         context_files=args.context_files,
         max_retries=args.max_retries,
         estimated_tokens=args.estimated_tokens,
+        model=args.model,
     )
     # Use _save_single_prompt directly rather than load_queue_state() +
     # save_queue_state(). Loading the full queue state just to append one file
@@ -543,6 +547,8 @@ def cmd_bank_list(args) -> int:
         print(f"   Working directory: {template['working_directory']}")
         if template['estimated_tokens']:
             print(f"   Estimated tokens: {template['estimated_tokens']}")
+        if template.get('model'):
+            print(f"   Model: {template['model']}")
         print(f"   Modified: {template['modified'].strftime('%Y-%m-%d %H:%M:%S')}")
         print()
 
diff --git a/src/claude_code_queue/models.py b/src/claude_code_queue/models.py
index 2282715..e4817e1 100644
--- a/src/claude_code_queue/models.py
+++ b/src/claude_code_queue/models.py
@@ -35,6 +35,7 @@ class QueuedPrompt:
     status: PromptStatus = PromptStatus.QUEUED
     execution_log: str = ""
     estimated_tokens: Optional[int] = None
+    model: Optional[str] = None
     last_executed: Optional[datetime] = None
     rate_limited_at: Optional[datetime] = None
     reset_time: Optional[datetime] = None
diff --git a/src/claude_code_queue/storage.py b/src/claude_code_queue/storage.py
index 44ce89f..50839f7 100644
--- a/src/claude_code_queue/storage.py
+++ b/src/claude_code_queue/storage.py
@@ -107,6 +107,11 @@ def parse_prompt_file(file_path: Path) -> Optional[QueuedPrompt]:
             except (ValueError, TypeError):
                 retry_count = 0
 
+            # R7 — Type-safe coercion for model. YAML parses `model: true` as bool and
+            # `model: 42` as int; subprocess.Popen requires all cmd elements to be str.
+            _raw_model = metadata.get("model")
+            _model = str(_raw_model) if _raw_model is not None else None
+
             prompt = QueuedPrompt(
                 id=prompt_id,
                 content=prompt_content,
@@ -117,6 +122,7 @@ def parse_prompt_file(file_path: Path) -> Optional[QueuedPrompt]:
                 max_retries=metadata.get("max_retries", 3),
                 retry_count=retry_count,
                 estimated_tokens=metadata.get("estimated_tokens"),
+                model=_model,
                 # R5 — Restore created_at from YAML; fall back to filesystem ctime.
                 # Using ctime alone causes created_at to drift when files are copied or
                 # their timestamps change. The YAML value is the authoritative source.
@@ -161,6 +167,8 @@ def write_prompt_file(prompt: QueuedPrompt, file_path: Path) -> bool:
                 metadata["context_files"] = prompt.context_files
             if prompt.estimated_tokens:
                 metadata["estimated_tokens"] = prompt.estimated_tokens
+            if prompt.model is not None:
+                metadata["model"] = prompt.model
             if prompt.last_executed:
                 metadata["last_executed"] = prompt.last_executed.isoformat()
             if prompt.rate_limited_at:
@@ -453,6 +461,7 @@ def create_prompt_template(self, filename: str, priority: int = 0) -> Path:
 context_files: []
 max_retries: 3
 estimated_tokens: null
+model: null
 ---
 
 # Prompt Title
@@ -504,6 +513,7 @@ def save_prompt_to_bank(self, template_name: str, priority: int = 0) -> Path:
 context_files: []
 max_retries: 3
 estimated_tokens: null
+model: null
 ---
 
 # {safe_name.replace('-', ' ').replace('_', ' ').title()}
@@ -568,6 +578,7 @@ def list_bank_templates(self) -> List[dict]:
                     'priority': metadata.get('priority', 0),
                     'working_directory': metadata.get('working_directory', '.'),
                     'estimated_tokens': metadata.get('estimated_tokens'),
+                    'model': metadata.get('model'),
                     'modified': datetime.fromtimestamp(file_path.stat().st_mtime)
                 })
 
diff --git a/tests/test_claude_interface.py b/tests/test_claude_interface.py
index 181ba24..6832d2e 100644
--- a/tests/test_claude_interface.py
+++ b/tests/test_claude_interface.py
@@ -307,6 +307,42 @@ def test_execute_prompt_includes_dangerously_skip_permissions(interface):  # CLI
         assert "--dangerously-skip-permissions" in args
 
 
+def test_execute_prompt_includes_model_flag_when_set(interface):  # CLI-060
+    """When prompt.model is set, --model <value> appears in the subprocess command."""
+    mock_proc = make_mock_proc()
+    with patch("subprocess.Popen", return_value=mock_proc) as mock_popen:
+        prompt = QueuedPrompt(content="task", model="claude-haiku-4-5-20251001")
+        interface.execute_prompt(prompt)
+        args = mock_popen.call_args[0][0]
+        assert "--model" in args
+        model_idx = args.index("--model")
+        assert args[model_idx + 1] == "claude-haiku-4-5-20251001"
+
+
+def test_execute_prompt_omits_model_flag_when_none(interface):  # CLI-061
+    """When prompt.model is None, no --model flag is added."""
+    mock_proc = make_mock_proc()
+    with patch("subprocess.Popen", return_value=mock_proc) as mock_popen:
+        prompt = QueuedPrompt(content="task", model=None)
+        interface.execute_prompt(prompt)
+        args = mock_popen.call_args[0][0]
+        assert "--model" not in args
+
+
+def test_execute_prompt_model_flag_before_prompt_arg(interface):  # CLI-062
+    """--model flag is placed before the positional prompt argument."""
+    mock_proc = make_mock_proc()
+    with patch("subprocess.Popen", return_value=mock_proc) as mock_popen:
+        prompt = QueuedPrompt(content="my task", model="claude-opus-4-6")
+        interface.execute_prompt(prompt)
+        args = mock_popen.call_args[0][0]
+        model_idx = args.index("--model")
+        prompt_idx = args.index("my task")
+        assert model_idx < prompt_idx, (
+            f"--model at {model_idx} must precede prompt at {prompt_idx}"
+        )
+
+
 def test_execute_prompt_success_returns_success_result(interface):  # CLI-026
     """returncode=0 with no rate-limit output → success=True."""
     mock_proc = make_mock_proc(returncode=0, stdout="All done", stderr="")
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 5a6cd06..02be5d8 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -55,6 +55,7 @@ def _make_template(
     priority=0,
     working_directory=".",
     estimated_tokens=None,
+    model=None,
     modified=None,
 ):
     """Build a bank-template dict like QueueStorage returns."""
@@ -66,6 +67,7 @@ def _make_template(
         "priority": priority,
         "working_directory": working_directory,
         "estimated_tokens": estimated_tokens,
+        "model": model,
         "modified": modified,
     }
 
@@ -233,6 +235,21 @@ def test_add_default_estimated_tokens_none(self):
         prompt = storage._save_single_prompt.call_args[0][0]
         assert prompt.estimated_tokens is None
 
+    def test_add_model_long_flag(self):
+        _, storage = self._run_add("--model", "claude-haiku-4-5-20251001")
+        prompt = storage._save_single_prompt.call_args[0][0]
+        assert prompt.model == "claude-haiku-4-5-20251001"
+
+    def test_add_model_short_flag(self):
+        _, storage = self._run_add("-m", "claude-sonnet-4-6")
+        prompt = storage._save_single_prompt.call_args[0][0]
+        assert prompt.model == "claude-sonnet-4-6"
+
+    def test_add_default_model_none(self):
+        _, storage = self._run_add()
+        prompt = storage._save_single_prompt.call_args[0][0]
+        assert prompt.model is None
+
     def test_add_returns_zero_on_success(self):
         code, _ = self._run_add(success=True)
         assert code == 0
@@ -704,6 +721,14 @@ def test_bank_list_omits_estimated_tokens_when_none(self, capsys):
         self._run_bank_list(templates=[_make_template(estimated_tokens=None)])
         assert "Estimated tokens" not in capsys.readouterr().out
 
+    def test_bank_list_shows_model_when_set(self, capsys):
+        self._run_bank_list(templates=[_make_template(model="claude-haiku-4-5-20251001")])
+        assert "claude-haiku-4-5-20251001" in capsys.readouterr().out
+
+    def test_bank_list_omits_model_when_none(self, capsys):
+        self._run_bank_list(templates=[_make_template(model=None)])
+        assert "Model:" not in capsys.readouterr().out
+
     def test_bank_list_shows_modified_timestamp(self, capsys):
         mod = datetime(2026, 3, 1, 10, 30, 0)
         self._run_bank_list(templates=[_make_template(modified=mod)])
diff --git a/tests/test_models.py b/tests/test_models.py
index eb5974a..5c83f75 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -49,6 +49,23 @@ def test_add_log_is_cumulative():  # MOD-003
     assert "third" in p.execution_log
 
 
+# ===========================================================================
+# QueuedPrompt — model field
+# ===========================================================================
+
+
+def test_prompt_model_default_is_none():  # MOD-029
+    """QueuedPrompt defaults model to None."""
+    p = QueuedPrompt(content="test")
+    assert p.model is None
+
+
+def test_prompt_model_accepts_string():  # MOD-030
+    """model field stores an arbitrary string model ID."""
+    p = QueuedPrompt(content="test", model="claude-haiku-4-5-20251001")
+    assert p.model == "claude-haiku-4-5-20251001"
+
+
 # ===========================================================================
 # QueuedPrompt — should_execute_now()
 # ===========================================================================
diff --git a/tests/test_storage.py b/tests/test_storage.py
index cb80804..fedcbbc 100644
--- a/tests/test_storage.py
+++ b/tests/test_storage.py
@@ -386,6 +386,109 @@ def test_parse_estimated_tokens_null(tmp_path):  # STO-023
     assert prompt.estimated_tokens is None
 
 
+def test_parse_reads_model(tmp_path):  # STO-065
+    """model: claude-haiku-4-5-20251001 in frontmatter → prompt.model == that string."""
+    storage = QueueStorage(str(tmp_path))
+    file_path = storage.queue_dir / "abc12345-task.md"
+    file_path.write_text(
+        "---\npriority: 0\nworking_directory: .\nmax_retries: 3\n"
+        "model: claude-haiku-4-5-20251001\n"
+        "status: queued\nretry_count: 0\ncreated_at: 2025-01-01T00:00:00\n---\n\ncontent"
+    )
+    prompt = storage.parser.parse_prompt_file(file_path)
+    assert prompt is not None
+    assert prompt.model == "claude-haiku-4-5-20251001"
+
+
+def test_parse_model_null(tmp_path):  # STO-066
+    """model: null in frontmatter → prompt.model is None."""
+    storage = QueueStorage(str(tmp_path))
+    file_path = storage.queue_dir / "abc12345-task.md"
+    file_path.write_text(
+        "---\npriority: 0\nworking_directory: .\nmax_retries: 3\n"
+        "model: null\n"
+        "status: queued\nretry_count: 0\ncreated_at: 2025-01-01T00:00:00\n---\n\ncontent"
+    )
+    prompt = storage.parser.parse_prompt_file(file_path)
+    assert prompt is not None
+    assert prompt.model is None
+
+
+def test_parse_model_coerces_bool_to_string(tmp_path):  # STO-067
+    """model: true in YAML → str coercion → prompt.model == 'True' (R7)."""
+    storage = QueueStorage(str(tmp_path))
+    file_path = storage.queue_dir / "abc12345-task.md"
+    file_path.write_text(
+        "---\npriority: 0\nworking_directory: .\nmax_retries: 3\n"
+        "model: true\n"
+        "status: queued\nretry_count: 0\ncreated_at: 2025-01-01T00:00:00\n---\n\ncontent"
+    )
+    prompt = storage.parser.parse_prompt_file(file_path)
+    assert prompt is not None
+    assert prompt.model == "True"
+
+
+def test_parse_model_coerces_int_to_string(tmp_path):  # STO-068
+    """model: 42 in YAML → str coercion → prompt.model == '42' (R7)."""
+    storage = QueueStorage(str(tmp_path))
+    file_path = storage.queue_dir / "abc12345-task.md"
+    file_path.write_text(
+        "---\npriority: 0\nworking_directory: .\nmax_retries: 3\n"
+        "model: 42\n"
+        "status: queued\nretry_count: 0\ncreated_at: 2025-01-01T00:00:00\n---\n\ncontent"
+    )
+    prompt = storage.parser.parse_prompt_file(file_path)
+    assert prompt is not None
+    assert prompt.model == "42"
+
+
+def test_model_roundtrip_write_then_parse(tmp_path):  # STO-069
+    """model survives write_prompt_file → parse_prompt_file round-trip."""
+    storage = QueueStorage(str(tmp_path))
+    prompt = QueuedPrompt(id="abc12345", content="task", model="claude-opus-4-6")
+    file_path = storage.queue_dir / "abc12345-task.md"
+    storage.parser.write_prompt_file(prompt, file_path)
+    parsed = storage.parser.parse_prompt_file(file_path)
+    assert parsed is not None
+    assert parsed.model == "claude-opus-4-6"
+
+
+def test_model_none_roundtrip_write_then_parse(tmp_path):  # STO-070
+    """model=None survives write → parse round-trip (field omitted from YAML)."""
+    storage = QueueStorage(str(tmp_path))
+    prompt = QueuedPrompt(id="abc12345", content="task", model=None)
+    file_path = storage.queue_dir / "abc12345-task.md"
+    storage.parser.write_prompt_file(prompt, file_path)
+    parsed = storage.parser.parse_prompt_file(file_path)
+    assert parsed is not None
+    assert parsed.model is None
+
+
+def test_create_prompt_template_includes_model_field(tmp_path):  # STO-071
+    """create_prompt_template() output includes 'model: null' in frontmatter."""
+    storage = QueueStorage(str(tmp_path))
+    file_path = storage.create_prompt_template("my-task")
+    content = file_path.read_text()
+    assert "model: null" in content
+
+
+def test_save_prompt_to_bank_includes_model_field(tmp_path):  # STO-072
+    """save_prompt_to_bank() output includes 'model: null' in frontmatter."""
+    storage = QueueStorage(str(tmp_path))
+    file_path = storage.save_prompt_to_bank("my-template")
+    content = file_path.read_text()
+    assert "model: null" in content
+
+
+def test_bank_list_includes_model_key(tmp_path):  # STO-073
+    """list_bank_templates() dicts include a 'model' key."""
+    storage = QueueStorage(str(tmp_path))
+    storage.save_prompt_to_bank("my-template")
+    templates = storage.list_bank_templates()
+    assert len(templates) == 1
+    assert "model" in templates[0]
+
+
 def test_parse_defaults_when_keys_missing(tmp_path):  # STO-024
     """Minimal frontmatter → defaults: priority=0, max_retries=3, context_files=[],
     estimated_tokens=None.

From 56454767672b29b06ea16ad14670636e6f5c0112 Mon Sep 17 00:00:00 2001
From: Matthew Tibbits <mtibbits@users.noreply.github.com>
Date: Tue, 31 Mar 2026 05:06:09 +0000
Subject: [PATCH 5/6] feat: add batch-wizard skill for guided batch job
 creation

Ten-phase interactive workflow that walks users through scoping,
target discovery, prompt design, variable extraction, CSV generation,
priority config, dry-run generation, adversarial red-team review,
token efficiency optimization, and launch. Includes a visual progress
tracker at each phase transition.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Signed-off-by: Matthew Tibbits <mtibbits@users.noreply.github.com>
---
 skills/batch-wizard/SKILL.md | 277 +++++++++++++++++++++++++++++++++++
 1 file changed, 277 insertions(+)
 create mode 100644 skills/batch-wizard/SKILL.md

diff --git a/skills/batch-wizard/SKILL.md b/skills/batch-wizard/SKILL.md
new file mode 100644
index 0000000..72e6f4f
--- /dev/null
+++ b/skills/batch-wizard/SKILL.md
@@ -0,0 +1,277 @@
+---
+name: batch-wizard
+description: >
+  Guided multi-phase workflow to design, red-team, optimize, and generate
+  a batch of claude-queue jobs. Walks the user through scoping, target
+  discovery, prompt design, variable extraction, CSV generation, priority
+  config, job generation, adversarial review, token efficiency, and launch.
+  Triggers on: "create a batch", "design queue jobs", "batch wizard",
+  "plan a batch run", "generate jobs for", "queue a bunch of",
+  "batch workflow".
+allowed-tools: [Bash, Read, Glob, Grep, Write, Edit, Agent]
+argument-hint: "[project path or short description of the work]"
+disable-model-invocation: false
+---
+
+# Batch Job Wizard
+
+Guide the user through designing and generating a batch of claude-queue
+jobs. Follow each phase in order. **Do NOT skip phases** unless the user
+explicitly asks to. Ask for confirmation before advancing to the next
+phase.
+
+**Flexible entry:** If the user already has a template, CSV, or partial
+work, acknowledge what exists and pick up from the appropriate phase
+rather than forcing them to restart.
+
+## Progress Tracker
+
+At the start of **every phase** (including Phase 1), print the progress
+tracker below. Mark completed phases with `[x]`, the current phase with
+`-->`, and future phases with `[ ]`. This gives the user a visual map of
+where they are and what decisions are coming.
+
+```
+Batch Wizard Progress:
+  [x]  1. Scope
+  [x]  2. Target Discovery
+  -->  3. Prompt Design
+  [ ]  4. Template Variables
+  [ ]  5. CSV Generation
+  [ ]  6. Priority & Configuration
+  [ ]  7. Generate (Dry Run)
+  [ ]  8. Red Team Review
+  [ ]  9. Token Efficiency Review
+  [ ] 10. Review & Launch
+```
+
+When re-entering a phase (e.g., looping back from Phase 8 to revise the
+template), mark the revisited phase with `-->` and keep earlier phases
+as `[x]`. Phases after the current one revert to `[ ]` only if their
+output is invalidated by the revision.
+
+---
+
+## Phase 1: Scope
+
+Goal: Understand what the user wants to accomplish and where.
+
+- Ask: What project/directory are these jobs for?
+- Ask: What is the goal? (refactor, review, documentation, tests, migration, etc.)
+- Ask: Roughly how many targets do you expect?
+- Explore the project briefly (read CLAUDE.md, scan directory structure) to
+  build context for later phases.
+
+Confirm scope before proceeding.
+
+---
+
+## Phase 2: Target Discovery
+
+Goal: Build the concrete list of targets (files, functions, modules, etc.)
+that each job will operate on.
+
+- Based on Phase 1, use Glob/Grep/Agent to enumerate candidates.
+- Present the list to the user. Include the count.
+- Ask: Should any targets be excluded? Are there any missing?
+- Finalize the target list.
+
+---
+
+## Phase 3: Prompt Design
+
+Goal: Craft a high-quality prompt for one representative target.
+
+- Pick a representative target (ideally one of medium complexity).
+- Draft a complete prompt — title, context, step-by-step instructions,
+  expected output — as it would appear in a queue `.md` file body.
+- Show it to the user for feedback.
+- Iterate until they approve the prompt.
+
+**Tip:** Write the prompt as if addressing a capable colleague who has
+never seen this codebase and has no conversation history. Be specific
+about what to read, what to change, and what to verify.
+
+---
+
+## Phase 4: Template Variables
+
+Goal: Parameterize the approved prompt so it works across all targets.
+
+- Identify which parts of the prompt vary per target (filenames, paths,
+  function names, module names, etc.).
+- Replace them with `{{variable}}` placeholders.
+- Show the user the parameterized template side-by-side with the original
+  to confirm nothing was lost.
+- List the variables and their meanings.
+
+---
+
+## Phase 5: CSV Generation
+
+Goal: Produce the data file that drives batch generation.
+
+- Generate a CSV (or TSV) with one row per target and columns matching
+  each `{{variable}}`.
+- Show a preview of the first 5 and last 2 rows for confirmation.
+- Report the total row count and verify it matches the Phase 2 target list.
+
+---
+
+## Phase 6: Priority & Configuration
+
+Goal: Set the YAML frontmatter values for the batch.
+
+- Ask about or recommend:
+  - `priority` / `--base-priority` / `--priority-step` — explain that
+    without `--base-priority`, all jobs get the same priority and
+    execution order becomes non-deterministic.
+  - `model` — whether a specific model is needed or the default suffices.
+  - `max_retries` — recommend `-1` (unlimited) for idempotent tasks,
+    `3` for tasks with side effects.
+  - `working_directory` — confirm the absolute path.
+  - `context_files` — any files every job should have loaded.
+- Show the complete frontmatter block for approval.
+
+---
+
+## Phase 7: Generate (Dry Run)
+
+Goal: Write the template and CSV, validate, and preview before committing.
+
+- Write the template to `~/.claude-queue/bank/<name>.md`
+- Write the CSV alongside it or to a temp location.
+- Run: `claude-queue batch validate <name> --data <csv>`
+- Run: `claude-queue batch generate <name> --data <csv> --base-priority <N> [--priority-step <S>] --dry-run`
+- Show the dry-run output for review.
+- Ask: Does everything look right?
+
+---
+
+## Phase 8: Red Team Review
+
+Goal: Stress-test the prompt and batch design before committing real
+compute time. Each job will run in a **clean context window** with no
+memory of this conversation, so the prompt must stand completely on its
+own.
+
+Walk through each of the following questions with the user. For each one,
+explain *why* it matters — not just the question but the failure mode it
+prevents.
+
+### 8a. Scope & Guardrails
+
+> Could Claude, starting from a blank context with only this prompt,
+> wander off into a rabbit hole and never return useful output?
+
+Common failure modes:
+- Vague verbs ("improve", "refactor", "clean up") without success criteria
+- No explicit boundary on what files/directories to touch
+- No instruction to stop and report rather than guess when uncertain
+
+If the answer is yes, suggest adding scoping guardrails to the prompt.
+Acknowledge the tradeoff: tighter scope limits creativity, but
+well-guided execution has a higher probability of producing useful output
+across dozens of jobs.
+
+### 8b. Hidden Assumptions
+
+> Are there things you know about this project — conventions, gotchas,
+> recent decisions, tribal knowledge — that the prompt doesn't mention?
+
+Think of it this way: if you handed this prompt to a competent intern on
+their first day, what context would you need to give them verbally?
+That context should be in the prompt.
+
+Examples: "we use tabs not spaces", "don't modify the generated files in
+`build/`", "the `_legacy` suffix means do-not-touch", "PR titles must
+follow conventional commits".
+
+### 8c. Parallelism & Dependencies
+
+> Can these jobs run in parallel, or does one job's output affect another?
+
+`claude-queue` currently executes one job at a time, but this may change
+in the future. Even with serial execution, consider:
+- Does job N modify a file that job N+1 also reads? (merge conflicts)
+- Does job order matter? (e.g., creating an interface before implementing it)
+- Should certain jobs be grouped at a higher priority to run first?
+
+If dependencies exist, discuss whether to split into separate batches
+with different base priorities or add explicit ordering.
+
+### 8d. Idempotency
+
+> If a job runs twice (due to crash recovery), will the second run
+> produce a broken result?
+
+`claude-queue` has at-least-once semantics. If the daemon crashes
+mid-execution, the job will re-run. Flag any jobs that create resources,
+send messages, open PRs, or make API calls — these need idempotency
+guards in the prompt (e.g., "check if the PR already exists before
+creating one").
+
+After this review, offer to revise the template. If changes are made,
+re-run the dry run from Phase 7 to confirm.
+
+---
+
+## Phase 9: Token Efficiency Review
+
+Goal: Minimize wasted tokens across the batch without sacrificing quality.
+Every inefficiency is multiplied by the number of jobs.
+
+Walk through these considerations with the user:
+
+### 9a. Context Files
+
+- Are the `context_files` in frontmatter actually needed by every job?
+  Files loaded via `context_files` consume input tokens on every run.
+- Could some context be inlined in the prompt instead (a 3-line snippet
+  vs. loading a 500-line file)?
+- Conversely, does the prompt ask Claude to "read file X" when it could
+  be a `context_file` instead (saving a tool-call round trip)?
+
+### 9b. Prompt Verbosity
+
+- Is the prompt longer than it needs to be? Look for:
+  - Repeated instructions (said two different ways)
+  - Excessive examples (one clear example beats three)
+  - Boilerplate that adds no information
+- Every extra token in the prompt is multiplied by the job count. For a
+  batch of 100 jobs, trimming 500 tokens from the prompt saves 50,000
+  input tokens.
+
+### 9c. Model Selection
+
+- Does every job need the most capable (and most expensive) model?
+- Could a smaller/faster model handle straightforward tasks (e.g.,
+  simple renames, formatting fixes) while reserving the larger model for
+  complex reasoning?
+- Remind the user that `model:` can be set per-template in frontmatter.
+
+### 9d. Output Scope
+
+- Does the prompt constrain what Claude outputs? Without guidance, Claude
+  may produce verbose explanations, summaries, or commentary that consume
+  output tokens without adding value.
+- Consider adding: "Do not explain your changes. Just make them." or
+  "Keep your response under 200 words" where appropriate.
+
+Summarize estimated token impact if changes are made (rough order of
+magnitude is fine). Offer to revise the template.
+
+---
+
+## Phase 10: Review & Launch
+
+Goal: Final review and optional queue start.
+
+- Run: `claude-queue batch generate <name> --data <csv> --base-priority <N> [--priority-step <S>]`
+- Run: `claude-queue status --detailed` — show what will execute.
+- Report: total job count, estimated run time (based on ~1-2 min/job for
+  typical prompts), priority ordering.
+- Ask: Ready to start? Or do you want to review individual job files first?
+- If the user says go: `claude-queue start`
+- Remind the user they can monitor progress with `claude-queue status`
+  and cancel individual jobs with `claude-queue cancel <id>`.

From 22aa4ab9333b5c36f4badf59ea0d47dd4058009f Mon Sep 17 00:00:00 2001
From: Matthew Tibbits <mtibbits@users.noreply.github.com>
Date: Tue, 31 Mar 2026 05:10:53 +0000
Subject: [PATCH 6/6] fix: address red-team review feedback for batch-wizard
 skill

- Move skill to src/claude_code_queue/skills/batch-wizard/ to match
  existing queue skill location and package data glob
- Update install-skill CLI to discover and install all bundled skills,
  with optional skill_name argument for selective install
- Remove invalid allowed-tools frontmatter field
- Use explicit CSV path (~/.claude-queue/bank/) instead of vague location
- Fix speculative language in Phase 8c parallelism section
- Widen time estimate in Phase 10 to ~1-3 min/job

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Signed-off-by: Matthew Tibbits <mtibbits@users.noreply.github.com>
---
 src/claude_code_queue/cli.py                  | 50 +++++++++++++------
 .../skills}/batch-wizard/SKILL.md             | 12 ++---
 2 files changed, 40 insertions(+), 22 deletions(-)
 rename {skills => src/claude_code_queue/skills}/batch-wizard/SKILL.md (96%)

diff --git a/src/claude_code_queue/cli.py b/src/claude_code_queue/cli.py
index 2c24b70..8a60331 100644
--- a/src/claude_code_queue/cli.py
+++ b/src/claude_code_queue/cli.py
@@ -239,10 +239,16 @@ def main():
 
     # Install skill subcommand
     install_skill_parser = subparsers.add_parser(
-        "install-skill", help="Install the Claude Code skill to ~/.claude/skills/"
+        "install-skill", help="Install Claude Code skills to ~/.claude/skills/"
     )
     install_skill_parser.add_argument(
-        "--force", action="store_true", help="Overwrite existing skill file"
+        "--force", action="store_true", help="Overwrite existing skill files"
+    )
+    install_skill_parser.add_argument(
+        "skill_name",
+        nargs="?",
+        default=None,
+        help="Install a specific skill (e.g. 'queue', 'batch-wizard'). Installs all if omitted.",
     )
 
     # Cleanup subcommand
@@ -719,23 +725,35 @@ def cmd_batch_variables(args) -> int:
 
 
 def cmd_install_skill(args) -> int:
-    """Install the Claude Code skill file to ~/.claude/skills/queue/SKILL.md."""
-    dest = Path.home() / ".claude" / "skills" / "queue" / "SKILL.md"
-    skill_src = Path(__file__).parent / "skills" / "queue" / "SKILL.md"
+    """Install Claude Code skill files to ~/.claude/skills/."""
+    skills_pkg_dir = Path(__file__).parent / "skills"
+    available = [d.name for d in skills_pkg_dir.iterdir() if d.is_dir() and (d / "SKILL.md").exists()]
 
-    if not skill_src.exists():
-        print("Error: bundled SKILL.md not found in package installation.")
-        return 1
+    if args.skill_name:
+        if args.skill_name not in available:
+            print(f"Error: unknown skill '{args.skill_name}'. Available: {', '.join(sorted(available))}")
+            return 1
+        to_install = [args.skill_name]
+    else:
+        to_install = sorted(available)
 
-    if dest.exists() and not args.force:
-        print(f"Skill already installed at {dest}")
-        print("Use --force to overwrite.")
-        return 1
+    errors = 0
+    for name in to_install:
+        skill_src = skills_pkg_dir / name / "SKILL.md"
+        dest = Path.home() / ".claude" / "skills" / name / "SKILL.md"
+
+        if dest.exists() and not args.force:
+            print(f"  Skill '{name}' already installed at {dest} (use --force to overwrite)")
+            errors += 1
+            continue
+
+        dest.parent.mkdir(parents=True, exist_ok=True)
+        dest.write_text(skill_src.read_text(encoding="utf-8"), encoding="utf-8")
+        print(f"  Installed '{name}' to {dest}")
 
-    dest.parent.mkdir(parents=True, exist_ok=True)
-    dest.write_text(skill_src.read_text(encoding="utf-8"), encoding="utf-8")
-    print(f"Skill installed to {dest}")
-    print("Restart Claude Code for the /queue skill to become available.")
+    if errors:
+        return 1
+    print("Restart Claude Code for skills to become available.")
     return 0
 
 
diff --git a/skills/batch-wizard/SKILL.md b/src/claude_code_queue/skills/batch-wizard/SKILL.md
similarity index 96%
rename from skills/batch-wizard/SKILL.md
rename to src/claude_code_queue/skills/batch-wizard/SKILL.md
index 72e6f4f..0709963 100644
--- a/skills/batch-wizard/SKILL.md
+++ b/src/claude_code_queue/skills/batch-wizard/SKILL.md
@@ -8,7 +8,6 @@ description: >
   Triggers on: "create a batch", "design queue jobs", "batch wizard",
   "plan a batch run", "generate jobs for", "queue a bunch of",
   "batch workflow".
-allowed-tools: [Bash, Read, Glob, Grep, Write, Edit, Agent]
 argument-hint: "[project path or short description of the work]"
 disable-model-invocation: false
 ---
@@ -140,7 +139,7 @@ Goal: Set the YAML frontmatter values for the batch.
 Goal: Write the template and CSV, validate, and preview before committing.
 
 - Write the template to `~/.claude-queue/bank/<name>.md`
-- Write the CSV alongside it or to a temp location.
+- Write the CSV to `~/.claude-queue/bank/<name>.csv`
 - Run: `claude-queue batch validate <name> --data <csv>`
 - Run: `claude-queue batch generate <name> --data <csv> --base-priority <N> [--priority-step <S>] --dry-run`
 - Show the dry-run output for review.
@@ -191,8 +190,9 @@ follow conventional commits".
 
 > Can these jobs run in parallel, or does one job's output affect another?
 
-`claude-queue` currently executes one job at a time, but this may change
-in the future. Even with serial execution, consider:
+`claude-queue` currently executes one job at a time. If parallel
+execution is added later, dependency issues become critical. Even with
+serial execution, consider:
 - Does job N modify a file that job N+1 also reads? (merge conflicts)
 - Does job order matter? (e.g., creating an interface before implementing it)
 - Should certain jobs be grouped at a higher priority to run first?
@@ -269,8 +269,8 @@ Goal: Final review and optional queue start.
 
 - Run: `claude-queue batch generate <name> --data <csv> --base-priority <N> [--priority-step <S>]`
 - Run: `claude-queue status --detailed` — show what will execute.
-- Report: total job count, estimated run time (based on ~1-2 min/job for
-  typical prompts), priority ordering.
+- Report: total job count, estimated run time (based on ~1-3 min/job for
+  typical prompts, longer for complex multi-file tasks), priority ordering.
 - Ask: Ready to start? Or do you want to review individual job files first?
 - If the user says go: `claude-queue start`
 - Remind the user they can monitor progress with `claude-queue status`