From bdb72147632bcae2cef3c194e6fa9b6537e933ec Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 10 Apr 2026 00:33:29 +0000
Subject: [PATCH 01/10] Add playground task workspace scaffolding

Agent-Logs-Url: https://github.com/Grumpified-OGGVCT/hat_stack/sessions/5e1526c3-bd4e-48fc-a842-28b9c5065464

Co-authored-by: AccidentalJedi <92951150+AccidentalJedi@users.noreply.github.com>
---
 .github/workflows/hats-task.yml |  62 ++++++-
 FORK_SETUP.md                   |  23 ++-
 README.md                       |  11 +-
 scripts/hat                     |  20 +-
 scripts/hats_task_runner.py     | 313 +++++++++++++++++++++++++++++++-
 5 files changed, 407 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/hats-task.yml b/.github/workflows/hats-task.yml
index 4e781b6..8029ac3 100644
--- a/.github/workflows/hats-task.yml
+++ b/.github/workflows/hats-task.yml
@@ -61,6 +61,26 @@ on:
         required: false
         type: string
         default: ""
+      category:
+        description: "Playground category (default: inferred from task type)"
+        required: false
+        type: string
+        default: ""
+      genre:
+        description: "Playground genre/type bucket"
+        required: false
+        type: string
+        default: ""
+      project:
+        description: "Playground project slug"
+        required: false
+        type: string
+        default: ""
+      workspace_root:
+        description: "Sandbox workspace root on the runner"
+        required: false
+        type: string
+        default: "/tmp/hats-playground"
 
 permissions:
   contents: read
@@ -94,6 +114,10 @@ jobs:
             echo "callback_repo=${{ inputs.callback_repo }}" >> "$GITHUB_OUTPUT"
             echo "callback_pr=${{ inputs.callback_pr }}" >> "$GITHUB_OUTPUT"
             echo "callback_issue=${{ inputs.callback_issue }}" >> "$GITHUB_OUTPUT"
+            echo "category=${{ inputs.category }}" >> "$GITHUB_OUTPUT"
+            echo "genre=${{ inputs.genre }}" >> "$GITHUB_OUTPUT"
+            echo "project=${{ inputs.project }}" >> "$GITHUB_OUTPUT"
+            echo "workspace_root=${{ inputs.workspace_root }}" >> "$GITHUB_OUTPUT"
             # Prompt may contain special chars — pass via env to avoid code injection
             INPUT_PROMPT="${{ inputs.prompt }}" python3 -c "
           import os, re
@@ -126,8 +150,12 @@ jobs:
               f.write(f\"callback_repo={sanitize(payload.get('callback_repo', ''))}\n\")
               f.write(f\"callback_pr={sanitize(payload.get('callback_pr', ''))}\n\")
               f.write(f\"callback_issue={sanitize(payload.get('callback_issue', ''))}\n\")
+              f.write(f\"category={sanitize(payload.get('category', ''))}\n\")
+              f.write(f\"genre={sanitize(payload.get('genre', ''))}\n\")
+              f.write(f\"project={sanitize(payload.get('project', ''))}\n\")
+              f.write(f\"workspace_root={sanitize(payload.get('workspace_root', '/tmp/hats-playground'))}\n\")
               f.write(f\"context={sanitize(payload.get('context', ''))}\n\")
-          "
+           "
           fi
 
       - name: Fetch context from callback repo
@@ -175,14 +203,29 @@ jobs:
             --task "${{ steps.params.outputs.task }}"
             --prompt "${{ steps.params.outputs.prompt }}"
             --config scripts/hat_configs.yml
-            --output /tmp/hats-task-output
             --json-file /tmp/hats-task-result.json
+            --workspace-root "${{ steps.params.outputs.workspace_root }}"
+            --source-repo "${{ steps.params.outputs.callback_repo }}"
+            --source-pr "${{ steps.params.outputs.callback_pr }}"
+            --source-issue "${{ steps.params.outputs.callback_issue }}"
           )
 
           if [ -n "${{ steps.params.outputs.hats }}" ]; then
             ARGS+=(--hats "${{ steps.params.outputs.hats }}")
           fi
 
+          if [ -n "${{ steps.params.outputs.category }}" ]; then
+            ARGS+=(--category "${{ steps.params.outputs.category }}")
+          fi
+
+          if [ -n "${{ steps.params.outputs.genre }}" ]; then
+            ARGS+=(--genre "${{ steps.params.outputs.genre }}")
+          fi
+
+          if [ -n "${{ steps.params.outputs.project }}" ]; then
+            ARGS+=(--project "${{ steps.params.outputs.project }}")
+          fi
+
           if [ -d "/tmp/hats-context" ] && [ "$(ls -A /tmp/hats-context 2>/dev/null)" ]; then
             ARGS+=(--context-dir /tmp/hats-context)
           fi
@@ -211,6 +254,7 @@ jobs:
           CALLBACK_PR="${{ steps.params.outputs.callback_pr }}"
           CALLBACK_ISSUE="${{ steps.params.outputs.callback_issue }}"
           TARGET="${CALLBACK_PR:-$CALLBACK_ISSUE}"
+          OUTPUT_DIR="${{ steps.run.outputs.output_dir }}"
 
           if [ -z "$GH_TOKEN" ] || [ -z "$TARGET" ]; then
             echo "⚠️ Cannot post results — missing token or target"
@@ -218,8 +262,8 @@ jobs:
           fi
 
           # Build comment from summary
-          if [ -f /tmp/hats-task-output/HATS_TASK_SUMMARY.md ]; then
-            REPORT_BODY=$(cat /tmp/hats-task-output/HATS_TASK_SUMMARY.md)
+          if [ -n "$OUTPUT_DIR" ] && [ -f "$OUTPUT_DIR/HATS_TASK_SUMMARY.md" ]; then
+            REPORT_BODY=$(cat "$OUTPUT_DIR/HATS_TASK_SUMMARY.md")
           else
             REPORT_BODY="🎩 Hats Task completed. Files generated: ${{ steps.run.outputs.files_generated }}"
           fi
@@ -241,7 +285,15 @@ jobs:
         uses: actions/upload-artifact@v4
         with:
           name: hats-task-output
-          path: /tmp/hats-task-output/
+          path: ${{ steps.run.outputs.output_dir }}/
+          retention-days: 30
+
+      - name: Upload playground workspace
+        if: always() && steps.run.outputs.workspace_root != ''
+        uses: actions/upload-artifact@v4
+        with:
+          name: hats-playground
+          path: ${{ steps.run.outputs.workspace_root }}/
           retention-days: 30
 
       - name: Upload JSON result
diff --git a/FORK_SETUP.md b/FORK_SETUP.md
index 262a4d8..d703011 100644
--- a/FORK_SETUP.md
+++ b/FORK_SETUP.md
@@ -179,7 +179,7 @@ export HAT_STACK_REPO="YOUR_USERNAME/hat_stack"
 ```bash
 # Generate a new code module — results posted as a PR comment
 hat task generate_code "Build a FastAPI auth module with JWT and refresh tokens" \
-  --repo myorg/myapp --pr 42
+  --repo myorg/myapp --pr 42 --category code --genre api --project auth-service
 
 # Write documentation for an endpoint
 hat task generate_docs "Write API documentation for the /users endpoints" \
@@ -187,7 +187,7 @@ hat task generate_docs "Write API documentation for the /users endpoints" \
 
 # Plan a migration
 hat task plan "Plan a migration from REST to GraphQL for the orders service" \
-  --repo myorg/myapp
+  --repo myorg/myapp --category plans --genre migration --project orders-service
 
 # Generate tests for a module
 hat task test "Write unit tests for auth.py covering edge cases and error paths" \
@@ -210,7 +210,24 @@ hat status
 3. The task runner selects the right hats and models for the job
 4. Primary hat generates the deliverable, supporting hats review/enhance it
 5. Gold Hat does final QA
-6. Results are posted back to your project's PR/issue as a comment
+6. Results are written into a sandboxed playground tree on the runner using `category/genre/project/run-id`
+7. Results are posted back to your project's PR/issue as a comment and uploaded as artifacts
+
+**Playground layout:**
+
+```text
+/tmp/hats-playground/
+└── <category>/
+    └── <genre>/
+        └── <project>/
+            └── <run-id>/
+                ├── generated files...
+                ├── HATS_TASK_SUMMARY.md
+                ├── hats_task_result.json
+                └── PLAYGROUND_MANIFEST.json
+```
+
+If you do not pass `--category`, `--genre`, or `--project`, Hat Stack infers sensible defaults and creates the folders automatically.
 
 **For Copilot in VS Code:** Your Copilot agent can shell out to `hat task ...` commands. The `gh` CLI handles auth, and hat_stack handles execution. Your Copilot agent gives the instruction, hat_stack's model pool does the heavy lifting, results come back to the PR.
 
diff --git a/README.md b/README.md
index e712b50..a092f3c 100644
--- a/README.md
+++ b/README.md
@@ -190,7 +190,8 @@ cp scripts/hat /usr/local/bin/hat   # or add scripts/ to PATH
 export HAT_STACK_REPO="YOUR_USERNAME/hat_stack"
 
 # Generate code
-hat task generate_code "Build a FastAPI auth module with JWT" --repo myorg/app --pr 42
+hat task generate_code "Build a FastAPI auth module with JWT" \
+  --repo myorg/app --pr 42 --category code --genre api --project auth-service
 
 # Write documentation
 hat task generate_docs "Write API docs for /users endpoints" --repo myorg/app --issue 10
@@ -208,6 +209,13 @@ hat task analyze "Security audit of payment processing" --repo myorg/payments
 git diff main | hat review - --repo myorg/app --pr 123
 ```
 
+Task runs now support a structured playground sandbox on the runner:
+
+- Default workspace root: `/tmp/hats-playground`
+- Layout: `<workspace>/<category>/<genre>/<project>/<run-id>/`
+- Contents: generated files, `HATS_TASK_SUMMARY.md`, `hats_task_result.json`, `PLAYGROUND_MANIFEST.json`
+- Persistence: both the run output and the full playground tree are uploaded as workflow artifacts
+
 Or dispatch directly via `gh` CLI (what your Copilot agent would call):
 
 ```bash
@@ -275,4 +283,3 @@ hat_stack/
 ## License
 
 MIT — See [LICENSE](LICENSE).
-
diff --git a/scripts/hat b/scripts/hat
index 5b978ad..ce45814 100755
--- a/scripts/hat
+++ b/scripts/hat
@@ -9,7 +9,8 @@
 #
 # Usage:
 #   hat review <diff_file> [--repo owner/repo] [--pr 42]
-#   hat task <task_type> "<prompt>" [--repo owner/repo] [--pr 42] [--hats black,green]
+#   hat task <task_type> "<prompt>" [--repo owner/repo] [--pr 42] [--hats black,green] \
+#     [--category code] [--genre api] [--project auth-service]
 #   hat status [run_id]
 #   hat list-tasks
 #
@@ -19,7 +20,7 @@
 #
 #   # Generate a new module
 #   hat task generate_code "Build a FastAPI auth module with JWT and refresh tokens" \
-#     --repo myorg/myapp --pr 123
+#     --repo myorg/myapp --pr 123 --category code --genre api --project auth-service
 #
 #   # Write docs for an existing module
 #   hat task generate_docs "Write API documentation for the /users endpoints" \
@@ -174,6 +175,7 @@ cmd_task() {
   local task_type="$1"; shift
   local prompt="$1"; shift
   local callback_repo="" callback_pr="" callback_issue="" hats="" context=""
+  local category="" genre="" project=""
 
   while [[ $# -gt 0 ]]; do
     case "$1" in
@@ -182,6 +184,9 @@ cmd_task() {
       --issue)   callback_issue="$2"; shift 2 ;;
       --hats)    hats="$2"; shift 2 ;;
       --context) context="$2"; shift 2 ;;
+      --category) category="$2"; shift 2 ;;
+      --genre)    genre="$2"; shift 2 ;;
+      --project)  project="$2"; shift 2 ;;
       *) die "Unknown option: $1" ;;
     esac
   done
@@ -199,6 +204,7 @@ cmd_task() {
   payload=$(HAT_TASK="$task_type" HAT_PROMPT="$prompt" \
     HAT_CB_REPO="$callback_repo" HAT_CB_PR="$callback_pr" \
     HAT_CB_ISSUE="$callback_issue" HAT_HATS="$hats" HAT_CTX="$context" \
+    HAT_CATEGORY="$category" HAT_GENRE="$genre" HAT_PROJECT="$project" \
     python3 -c "
 import json, os
 payload = {
@@ -209,6 +215,9 @@ payload = {
     'callback_issue': os.environ.get('HAT_CB_ISSUE', ''),
     'hats': os.environ.get('HAT_HATS', ''),
     'context': os.environ.get('HAT_CTX', ''),
+    'category': os.environ.get('HAT_CATEGORY', ''),
+    'genre': os.environ.get('HAT_GENRE', ''),
+    'project': os.environ.get('HAT_PROJECT', ''),
 }
 payload = {k: v for k, v in payload.items() if v}
 print(json.dumps(payload))
@@ -250,7 +259,7 @@ cmd_list_tasks() {
   echo ""
   echo -e "${BOLD}Usage:${NC}"
   echo '  hat task generate_code "Build a user auth module" --repo myorg/myapp --pr 42'
-  echo '  hat task plan "Plan migration to microservices" --repo myorg/monolith'
+  echo '  hat task plan "Plan migration to microservices" --repo myorg/monolith --category plans --genre migration --project orders'
 }
 
 cmd_help() {
@@ -269,10 +278,13 @@ cmd_help() {
   echo "  --issue <number>       Issue to post results to"
   echo "  --hats <hat1,hat2>     Specific hats to use"
   echo "  --context <text>       Additional context"
+  echo "  --category <name>      Playground category label"
+  echo "  --genre <name>         Playground genre/type label"
+  echo "  --project <name>       Playground project label"
   echo ""
   echo -e "${BOLD}Examples:${NC}"
   echo '  git diff main | hat review - --repo myorg/app --pr 42'
-  echo '  hat task generate_code "Build JWT auth module" --repo myorg/app --pr 42'
+  echo '  hat task generate_code "Build JWT auth module" --repo myorg/app --pr 42 --category code --genre api --project auth'
   echo '  hat task generate_docs "Write API docs for /users" --repo myorg/app --issue 10'
   echo '  hat task plan "Plan GraphQL migration" --repo myorg/app'
   echo '  hat task analyze "Security audit of payments" --repo myorg/payments'
diff --git a/scripts/hats_task_runner.py b/scripts/hats_task_runner.py
index 13bc9e4..41b88c3 100644
--- a/scripts/hats_task_runner.py
+++ b/scripts/hats_task_runner.py
@@ -32,6 +32,7 @@
 import argparse
 import json
 import os
+import re
 import sys
 import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
@@ -89,6 +90,15 @@
     },
 }
 
+DEFAULT_CATEGORIES = {
+    "generate_code": "code",
+    "generate_docs": "docs",
+    "refactor": "code",
+    "analyze": "analysis",
+    "plan": "plans",
+    "test": "tests",
+}
+
 # ---------------------------------------------------------------------------
 # Task-mode system prompts — transform hats from reviewers to builders
 # ---------------------------------------------------------------------------
@@ -136,6 +146,210 @@ def load_config(config_path: str | Path) -> dict:
         return yaml.safe_load(fh)
 
 
+def slugify_path_component(value: str | None, default: str) -> str:
+    """Normalize workspace path components for predictable human-readable folders."""
+    text = (value or "").strip().lower()
+    text = re.sub(r"[^a-z0-9._-]+", "-", text)
+    text = re.sub(r"-{2,}", "-", text).strip(".-")
+    return text or default
+
+
+def infer_project_slug(source_repo: str | None, task_type: str) -> str:
+    """Choose a stable project folder name."""
+    if source_repo:
+        repo_name = source_repo.rsplit("/", 1)[-1]
+        return slugify_path_component(repo_name, "project")
+    return slugify_path_component(task_type, "adhoc")
+
+
+def build_run_id(explicit_run_id: str | None = None) -> str:
+    """Build a deterministic run id for workspace storage."""
+    if explicit_run_id:
+        return slugify_path_component(explicit_run_id, "run")
+
+    github_run_id = os.environ.get("GITHUB_RUN_ID", "").strip()
+    github_attempt = os.environ.get("GITHUB_RUN_ATTEMPT", "").strip()
+    if github_run_id:
+        attempt_suffix = f"-attempt-{github_attempt}" if github_attempt else ""
+        return f"run-{slugify_path_component(github_run_id, 'run')}{attempt_suffix}"
+
+    return time.strftime("run-%Y%m%d-%H%M%S", time.gmtime())
+
+
+def resolve_workspace_root(workspace_root: str | None) -> Path | None:
+    """Resolve and validate the optional workspace root."""
+    if not workspace_root:
+        return None
+    return Path(workspace_root).expanduser().resolve()
+
+
+def ensure_path_within_root(root: Path, candidate: Path) -> Path:
+    """Ensure candidate resolves inside the declared sandbox root."""
+    resolved = candidate.resolve()
+    try:
+        resolved.relative_to(root)
+    except ValueError as exc:
+        raise ValueError(f"Path escapes sandbox root: {candidate}") from exc
+    return resolved
+
+
+def safe_output_path(output_dir: Path, relative_path: str) -> Path:
+    """Return a safe output path within the output directory."""
+    rel = Path(relative_path)
+    if rel.is_absolute() or not relative_path.strip():
+        raise ValueError(f"Unsafe generated file path: {relative_path!r}")
+    if any(part in ("..", "") for part in rel.parts):
+        raise ValueError(f"Unsafe generated file path: {relative_path!r}")
+    return ensure_path_within_root(output_dir.resolve(), output_dir / rel)
+
+
+def prepare_workspace(
+    task_type: str,
+    workspace_root: str | None = None,
+    category: str | None = None,
+    genre: str | None = None,
+    project: str | None = None,
+    run_id: str | None = None,
+    source_repo: str | None = None,
+    explicit_output_dir: str | None = None,
+) -> dict:
+    """Prepare sandbox workspace metadata and return resolved output path."""
+    resolved_workspace_root = resolve_workspace_root(workspace_root)
+    if not resolved_workspace_root:
+        output_dir = Path(explicit_output_dir or "/tmp/hats-task-output").expanduser().resolve()
+        return {
+            "workspace_root": None,
+            "output_dir": output_dir,
+            "category": None,
+            "genre": None,
+            "project": None,
+            "run_id": None,
+        }
+
+    normalized_category = slugify_path_component(
+        category, DEFAULT_CATEGORIES.get(task_type, "misc")
+    )
+    normalized_genre = slugify_path_component(genre, "general")
+    normalized_project = slugify_path_component(
+        project, infer_project_slug(source_repo, task_type)
+    )
+    normalized_run_id = build_run_id(run_id)
+
+    output_dir = ensure_path_within_root(
+        resolved_workspace_root,
+        resolved_workspace_root / normalized_category / normalized_genre / normalized_project / normalized_run_id,
+    )
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    return {
+        "workspace_root": resolved_workspace_root,
+        "output_dir": output_dir,
+        "category": normalized_category,
+        "genre": normalized_genre,
+        "project": normalized_project,
+        "run_id": normalized_run_id,
+    }
+
+
+def build_run_manifest(
+    task_result: dict,
+    prompt: str,
+    requested_hats: list[str] | None,
+    source_repo: str | None,
+    source_pr: str | None,
+    source_issue: str | None,
+    workspace_info: dict,
+) -> dict:
+    """Build manifest metadata for a sandboxed task run."""
+    output_dir = workspace_info["output_dir"]
+    files = [
+        {
+            "path": entry["path"],
+            "description": entry.get("description", ""),
+            "absolute_path": str((output_dir / entry["path"]).resolve()),
+        }
+        for entry in task_result.get("files", [])
+    ]
+
+    return {
+        "schema_version": 1,
+        "created_at_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
+        "status": "completed",
+        "task_type": task_result["task_type"],
+        "prompt": prompt,
+        "requested_hats": requested_hats or [],
+        "primary_hat": task_result["primary_hat"],
+        "summary": task_result["summary"],
+        "notes": task_result.get("notes", []),
+        "source": {
+            "repo": source_repo or "",
+            "pr": source_pr or "",
+            "issue": source_issue or "",
+        },
+        "workspace": {
+            "root": str(workspace_info["workspace_root"]) if workspace_info["workspace_root"] else "",
+            "category": workspace_info["category"] or "",
+            "genre": workspace_info["genre"] or "",
+            "project": workspace_info["project"] or "",
+            "run_id": workspace_info["run_id"] or "",
+            "output_dir": str(output_dir),
+        },
+        "generated_files": files,
+        "stats": task_result.get("stats", {}),
+    }
+
+
+def write_workspace_indexes(workspace_root: Path):
+    """Write human-readable indexes for the sandbox workspace."""
+    categories = []
+    for category_dir in sorted(p for p in workspace_root.iterdir() if p.is_dir()):
+        category_lines = [f"# {category_dir.name} playground index", ""]
+        category_projects = []
+
+        for genre_dir in sorted(p for p in category_dir.iterdir() if p.is_dir()):
+            for project_dir in sorted(p for p in genre_dir.iterdir() if p.is_dir()):
+                runs = sorted(p for p in project_dir.iterdir() if p.is_dir())
+                if not runs:
+                    continue
+                latest = runs[-1]
+                category_projects.append((genre_dir.name, project_dir.name, latest.name, len(runs)))
+
+        if category_projects:
+            category_lines.append("| Genre | Project | Latest Run | Runs |")
+            category_lines.append("|-------|---------|------------|------|")
+            for genre_name, project_name, latest_run, run_count in category_projects:
+                category_lines.append(
+                    f"| {genre_name} | {project_name} | `{latest_run}` | {run_count} |"
+                )
+            category_lines.append("")
+        else:
+            category_lines.append("_No projects yet._")
+            category_lines.append("")
+
+        (category_dir / "CATEGORY_INDEX.md").write_text(
+            "\n".join(category_lines), encoding="utf-8"
+        )
+        categories.append((category_dir.name, len(category_projects)))
+
+    root_lines = ["# Hats Playground Index", ""]
+    if categories:
+        root_lines.append("| Category | Projects |")
+        root_lines.append("|----------|----------|")
+        for category_name, project_count in categories:
+            root_lines.append(f"| {category_name} | {project_count} |")
+        root_lines.append("")
+        root_lines.append(
+            "Folder layout: `playground/<category>/<genre>/<project>/<run-id>/`"
+        )
+    else:
+        root_lines.append("_No playground runs yet._")
+    root_lines.append("")
+
+    (workspace_root / "PLAYGROUND_INDEX.md").write_text(
+        "\n".join(root_lines), encoding="utf-8"
+    )
+
+
 def call_ollama(config: dict, model: str, system_prompt: str, user_prompt: str,
                 temperature: float = 0.3, max_tokens: int = 8192,
                 timeout: int = 300) -> dict:
@@ -445,13 +659,22 @@ def run_task_pipeline(config: dict, task_type: str, user_prompt: str,
     }
 
 
-def write_output_files(task_result: dict, output_dir: str):
+def write_output_files(
+    task_result: dict,
+    output_dir: str | Path,
+    workspace_info: dict | None = None,
+    prompt: str = "",
+    requested_hats: list[str] | None = None,
+    source_repo: str | None = None,
+    source_pr: str | None = None,
+    source_issue: str | None = None,
+):
     """Write generated files to the output directory."""
-    out = Path(output_dir)
+    out = Path(output_dir).resolve()
     out.mkdir(parents=True, exist_ok=True)
 
     for file_entry in task_result.get("files", []):
-        filepath = out / file_entry["path"]
+        filepath = safe_output_path(out, file_entry["path"])
         filepath.parent.mkdir(parents=True, exist_ok=True)
         filepath.write_text(file_entry["content"], encoding="utf-8")
         print(f"  📄 {filepath}", file=sys.stderr)
@@ -487,6 +710,21 @@ def write_output_files(task_result: dict, output_dir: str):
     json_path.write_text(json.dumps(task_result, indent=2), encoding="utf-8")
     print(f"  📊 {json_path}", file=sys.stderr)
 
+    if workspace_info and workspace_info.get("workspace_root"):
+        manifest = build_run_manifest(
+            task_result,
+            prompt=prompt,
+            requested_hats=requested_hats,
+            source_repo=source_repo,
+            source_pr=source_pr,
+            source_issue=source_issue,
+            workspace_info=workspace_info,
+        )
+        manifest_path = out / "PLAYGROUND_MANIFEST.json"
+        manifest_path.write_text(json.dumps(manifest, indent=2), encoding="utf-8")
+        print(f"  🗂️ {manifest_path}", file=sys.stderr)
+        write_workspace_indexes(workspace_info["workspace_root"])
+
 
 # ---------------------------------------------------------------------------
 # CLI entry point
@@ -525,9 +763,61 @@ def main():
         "--json-file", default=None,
         help="Path to write JSON result (in addition to output dir)"
     )
+    parser.add_argument(
+        "--workspace-root", default=None,
+        help="Optional sandbox root for structured playground storage"
+    )
+    parser.add_argument(
+        "--category", default=None,
+        help="Optional playground category (default: inferred from task type)"
+    )
+    parser.add_argument(
+        "--genre", default=None,
+        help="Optional playground genre/type bucket"
+    )
+    parser.add_argument(
+        "--project", default=None,
+        help="Optional playground project slug"
+    )
+    parser.add_argument(
+        "--run-id", default=None,
+        help="Optional run id folder name inside the playground project"
+    )
+    parser.add_argument(
+        "--source-repo", default=None,
+        help="Source repo for manifest metadata"
+    )
+    parser.add_argument(
+        "--source-pr", default=None,
+        help="Source PR number for manifest metadata"
+    )
+    parser.add_argument(
+        "--source-issue", default=None,
+        help="Source issue number for manifest metadata"
+    )
 
     args = parser.parse_args()
 
+    workspace_info = prepare_workspace(
+        task_type=args.task,
+        workspace_root=args.workspace_root,
+        category=args.category,
+        genre=args.genre,
+        project=args.project,
+        run_id=args.run_id,
+        source_repo=args.source_repo,
+        explicit_output_dir=args.output,
+    )
+    output_dir = workspace_info["output_dir"]
+
+    github_output = os.environ.get("GITHUB_OUTPUT")
+    if github_output:
+        with open(github_output, "a", encoding="utf-8") as fh:
+            fh.write(f"output_dir={output_dir}\n")
+            fh.write(
+                f"workspace_root={workspace_info['workspace_root'] or ''}\n"
+            )
+
     # Preflight
     api_key = os.environ.get("OLLAMA_API_KEY", "").strip()
     if not api_key:
@@ -554,7 +844,7 @@ def main():
 
     requested_hats = None
     if args.hats:
-        requested_hats = [h.strip() for h in args.hats.split(",")]
+        requested_hats = [h.strip() for h in args.hats.split(",") if h.strip()]
 
     # Run the task
     result = run_task_pipeline(
@@ -564,21 +854,28 @@ def main():
     )
 
     # Write outputs
-    print(f"\n📦 Writing output to {args.output}/", file=sys.stderr)
-    write_output_files(result, args.output)
+    print(f"\n📦 Writing output to {output_dir}/", file=sys.stderr)
+    write_output_files(
+        result,
+        output_dir,
+        workspace_info=workspace_info,
+        prompt=args.prompt,
+        requested_hats=requested_hats,
+        source_repo=args.source_repo,
+        source_pr=args.source_pr,
+        source_issue=args.source_issue,
+    )
 
     if args.json_file:
         with open(args.json_file, "w", encoding="utf-8") as fh:
             json.dump(result, fh, indent=2)
 
     # GitHub Actions outputs
-    github_output = os.environ.get("GITHUB_OUTPUT")
     if github_output:
         with open(github_output, "a", encoding="utf-8") as fh:
             fh.write(f"task_type={result['task_type']}\n")
             fh.write(f"files_generated={len(result['files'])}\n")
             fh.write(f"hats_executed={result['stats']['hats_executed']}\n")
-            fh.write(f"output_dir={args.output}\n")
 
     print(f"\n✅ Task complete: {len(result['files'])} files generated, "
           f"{result['stats']['hats_executed']} hats used", file=sys.stderr)

From 3ee1595a48b5cae3596b09c3fee5c478b8dc6e54 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 10 Apr 2026 00:34:43 +0000
Subject: [PATCH 02/10] Add task playground sandbox and model fallbacks

Agent-Logs-Url: https://github.com/Grumpified-OGGVCT/hat_stack/sessions/5e1526c3-bd4e-48fc-a842-28b9c5065464

Co-authored-by: AccidentalJedi <92951150+AccidentalJedi@users.noreply.github.com>
---
 FORK_SETUP.md               |  2 ++
 README.md                   |  1 +
 scripts/hats_task_runner.py | 72 ++++++++++++++++++++++++++++++-------
 3 files changed, 62 insertions(+), 13 deletions(-)

diff --git a/FORK_SETUP.md b/FORK_SETUP.md
index d703011..0c0abc0 100644
--- a/FORK_SETUP.md
+++ b/FORK_SETUP.md
@@ -229,6 +229,8 @@ hat status
 
 If you do not pass `--category`, `--genre`, or `--project`, Hat Stack infers sensible defaults and creates the folders automatically.
 
+If the first Ollama model fails during task mode, Hat Stack automatically retries comparable configured fallback models before giving up.
+
 **For Copilot in VS Code:** Your Copilot agent can shell out to `hat task ...` commands. The `gh` CLI handles auth, and hat_stack handles execution. Your Copilot agent gives the instruction, hat_stack's model pool does the heavy lifting, results come back to the PR.
 
 ---
diff --git a/README.md b/README.md
index a092f3c..83efc45 100644
--- a/README.md
+++ b/README.md
@@ -215,6 +215,7 @@ Task runs now support a structured playground sandbox on the runner:
 - Layout: `<workspace>/<category>/<genre>/<project>/<run-id>/`
 - Contents: generated files, `HATS_TASK_SUMMARY.md`, `hats_task_result.json`, `PLAYGROUND_MANIFEST.json`
 - Persistence: both the run output and the full playground tree are uploaded as workflow artifacts
+- Resilience: if the first Ollama model fails, task mode retries comparable configured fallback models automatically
 
 Or dispatch directly via `gh` CLI (what your Copilot agent would call):
 
diff --git a/scripts/hats_task_runner.py b/scripts/hats_task_runner.py
index 41b88c3..c7e5a95 100644
--- a/scripts/hats_task_runner.py
+++ b/scripts/hats_task_runner.py
@@ -428,6 +428,42 @@ def select_model_for_task(config: dict, hat_id: str, task_type: str) -> str:
     return hat_def.get("primary_model", "nemotron-3-super")
 
 
+def build_comparable_model_sequence(
+    config: dict,
+    primary_model: str,
+    fallback_model: str | None = None,
+) -> list[str]:
+    """Build a prioritized model fallback list using comparable configured tiers."""
+    models_cfg = config.get("models", {})
+    seen = set()
+    ordered_models = []
+
+    def add(model_name: str | None):
+        if model_name and model_name in models_cfg and model_name not in seen:
+            ordered_models.append(model_name)
+            seen.add(model_name)
+
+    add(primary_model)
+    add(fallback_model)
+
+    primary_tier = models_cfg.get(primary_model, {}).get("tier")
+    fallback_tier = models_cfg.get(fallback_model, {}).get("tier") if fallback_model else None
+
+    for model_name, model_meta in models_cfg.items():
+        if model_meta.get("tier") == primary_tier:
+            add(model_name)
+
+    if fallback_tier and fallback_tier != primary_tier:
+        for model_name, model_meta in models_cfg.items():
+            if model_meta.get("tier") == fallback_tier:
+                add(model_name)
+
+    for model_name in models_cfg:
+        add(model_name)
+
+    return ordered_models
+
+
 def build_task_prompt(config: dict, hat_id: str, task_type: str,
                       user_prompt: str, context_files: dict | None = None) -> tuple[str, str]:
     """Build the system and user prompts for a task execution.
@@ -466,29 +502,34 @@ def run_task_hat(config: dict, hat_id: str, task_type: str,
     )
 
     start = time.time()
-    result = call_ollama(
-        config, model, system_prompt, full_user_prompt,
-        temperature=hat_def.get("temperature", 0.3),
-        max_tokens=8192,  # Task mode needs more output room
-        timeout=hat_def.get("timeout_seconds", 300),
-    )
-    elapsed = time.time() - start
-
-    # Try fallback if primary fails
-    if result["error"] and hat_def.get("fallback_model"):
+    attempted_models = []
+    result = {
+        "error": "No model attempts executed",
+        "model": model,
+        "content": None,
+        "usage": {"input": 0, "output": 0},
+    }
+    for candidate_model in build_comparable_model_sequence(
+        config, model, hat_def.get("fallback_model")
+    ):
+        attempted_models.append(candidate_model)
         result = call_ollama(
-            config, hat_def["fallback_model"], system_prompt, full_user_prompt,
+            config, candidate_model, system_prompt, full_user_prompt,
             temperature=hat_def.get("temperature", 0.3),
-            max_tokens=8192,
+            max_tokens=8192,  # Task mode needs more output room
             timeout=hat_def.get("timeout_seconds", 300),
         )
-        elapsed = time.time() - start
+        if not result["error"]:
+            break
+
+    elapsed = time.time() - start
 
     report = {
         "hat_id": hat_id,
         "hat_name": hat_def.get("name", hat_id),
         "emoji": hat_def.get("emoji", "🎩"),
         "model_used": result["model"],
+        "attempted_models": attempted_models,
         "latency_seconds": round(elapsed, 2),
         "token_usage": result["usage"],
         "error": result["error"],
@@ -512,6 +553,11 @@ def run_task_hat(config: dict, hat_id: str, task_type: str,
             }]
             report["summary"] = "Model returned unstructured output"
 
+    if len(attempted_models) > 1 and report["model_used"] != attempted_models[0]:
+        report["notes"].append(
+            f"Primary model fallback used: {attempted_models[0]} → {report['model_used']}"
+        )
+
     return report
 
 

From c483be25fd7ef2b12e2bf107d372cce55cb7b4d9 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 10 Apr 2026 00:36:20 +0000
Subject: [PATCH 03/10] Refine task sandbox validation fixes

Agent-Logs-Url: https://github.com/Grumpified-OGGVCT/hat_stack/sessions/5e1526c3-bd4e-48fc-a842-28b9c5065464

Co-authored-by: AccidentalJedi <92951150+AccidentalJedi@users.noreply.github.com>
---
 .github/workflows/hats-task.yml |  4 ++--
 scripts/hats_task_runner.py     | 12 +++++++++---
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/hats-task.yml b/.github/workflows/hats-task.yml
index 8029ac3..3e1e8af 100644
--- a/.github/workflows/hats-task.yml
+++ b/.github/workflows/hats-task.yml
@@ -155,8 +155,8 @@ jobs:
               f.write(f\"project={sanitize(payload.get('project', ''))}\n\")
               f.write(f\"workspace_root={sanitize(payload.get('workspace_root', '/tmp/hats-playground'))}\n\")
               f.write(f\"context={sanitize(payload.get('context', ''))}\n\")
-           "
-          fi
+          "
+           fi
 
       - name: Fetch context from callback repo
         id: context
diff --git a/scripts/hats_task_runner.py b/scripts/hats_task_runner.py
index c7e5a95..24df3dc 100644
--- a/scripts/hats_task_runner.py
+++ b/scripts/hats_task_runner.py
@@ -34,6 +34,7 @@
 import os
 import re
 import sys
+import tempfile
 import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
@@ -198,7 +199,7 @@ def safe_output_path(output_dir: Path, relative_path: str) -> Path:
     rel = Path(relative_path)
     if rel.is_absolute() or not relative_path.strip():
         raise ValueError(f"Unsafe generated file path: {relative_path!r}")
-    if any(part in ("..", "") for part in rel.parts):
+    if any(part == ".." for part in rel.parts):
         raise ValueError(f"Unsafe generated file path: {relative_path!r}")
     return ensure_path_within_root(output_dir.resolve(), output_dir / rel)
 
@@ -216,7 +217,12 @@ def prepare_workspace(
     """Prepare sandbox workspace metadata and return resolved output path."""
     resolved_workspace_root = resolve_workspace_root(workspace_root)
     if not resolved_workspace_root:
-        output_dir = Path(explicit_output_dir or "/tmp/hats-task-output").expanduser().resolve()
+        default_output_dir = (
+            Path(tempfile.gettempdir())
+            / f"hats-task-output-{build_run_id()}-{os.getpid()}"
+        )
+        output_dir = Path(explicit_output_dir or default_output_dir).expanduser().resolve()
+        output_dir.mkdir(parents=True, exist_ok=True)
         return {
             "workspace_root": None,
             "output_dir": output_dir,
@@ -504,7 +510,7 @@ def run_task_hat(config: dict, hat_id: str, task_type: str,
     start = time.time()
     attempted_models = []
     result = {
-        "error": "No model attempts executed",
+        "error": "All model attempts failed",
         "model": model,
         "content": None,
         "usage": {"input": 0, "output": 0},

From 494a2fdee4123060f466f7b62613d00245e5de57 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 10 Apr 2026 00:37:21 +0000
Subject: [PATCH 04/10] Clarify fallback note logic

Agent-Logs-Url: https://github.com/Grumpified-OGGVCT/hat_stack/sessions/5e1526c3-bd4e-48fc-a842-28b9c5065464

Co-authored-by: AccidentalJedi <92951150+AccidentalJedi@users.noreply.github.com>
---
 scripts/hats_task_runner.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/hats_task_runner.py b/scripts/hats_task_runner.py
index 24df3dc..e2b5f0b 100644
--- a/scripts/hats_task_runner.py
+++ b/scripts/hats_task_runner.py
@@ -559,7 +559,8 @@ def run_task_hat(config: dict, hat_id: str, task_type: str,
             }]
             report["summary"] = "Model returned unstructured output"
 
-    if len(attempted_models) > 1 and report["model_used"] != attempted_models[0]:
+    used_fallback_model = len(attempted_models) > 1 and attempted_models[0] != report["model_used"]
+    if used_fallback_model:
         report["notes"].append(
             f"Primary model fallback used: {attempted_models[0]} → {report['model_used']}"
         )

From 11e3f69fdbaef99bbbf176bac75ee6fb24710909 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 10 Apr 2026 00:38:22 +0000
Subject: [PATCH 05/10] Address final review feedback

Agent-Logs-Url: https://github.com/Grumpified-OGGVCT/hat_stack/sessions/5e1526c3-bd4e-48fc-a842-28b9c5065464

Co-authored-by: AccidentalJedi <92951150+AccidentalJedi@users.noreply.github.com>
---
 .github/workflows/hats-task.yml | 2 +-
 scripts/hats_task_runner.py     | 9 ++++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/hats-task.yml b/.github/workflows/hats-task.yml
index 3e1e8af..8d8a838 100644
--- a/.github/workflows/hats-task.yml
+++ b/.github/workflows/hats-task.yml
@@ -156,7 +156,7 @@ jobs:
               f.write(f\"workspace_root={sanitize(payload.get('workspace_root', '/tmp/hats-playground'))}\n\")
               f.write(f\"context={sanitize(payload.get('context', ''))}\n\")
           "
-           fi
+          fi
 
       - name: Fetch context from callback repo
         id: context
diff --git a/scripts/hats_task_runner.py b/scripts/hats_task_runner.py
index e2b5f0b..2372330 100644
--- a/scripts/hats_task_runner.py
+++ b/scripts/hats_task_runner.py
@@ -196,8 +196,10 @@ def ensure_path_within_root(root: Path, candidate: Path) -> Path:
 
 def safe_output_path(output_dir: Path, relative_path: str) -> Path:
     """Return a safe output path within the output directory."""
+    if not relative_path.strip():
+        raise ValueError(f"Unsafe generated file path: {relative_path!r}")
     rel = Path(relative_path)
-    if rel.is_absolute() or not relative_path.strip():
+    if rel.is_absolute():
         raise ValueError(f"Unsafe generated file path: {relative_path!r}")
     if any(part == ".." for part in rel.parts):
         raise ValueError(f"Unsafe generated file path: {relative_path!r}")
@@ -509,6 +511,7 @@ def run_task_hat(config: dict, hat_id: str, task_type: str,
 
     start = time.time()
     attempted_models = []
+    fallback_used = False
     result = {
         "error": "All model attempts failed",
         "model": model,
@@ -526,6 +529,7 @@ def run_task_hat(config: dict, hat_id: str, task_type: str,
             timeout=hat_def.get("timeout_seconds", 300),
         )
         if not result["error"]:
+            fallback_used = candidate_model != attempted_models[0]
             break
 
     elapsed = time.time() - start
@@ -559,8 +563,7 @@ def run_task_hat(config: dict, hat_id: str, task_type: str,
             }]
             report["summary"] = "Model returned unstructured output"
 
-    used_fallback_model = len(attempted_models) > 1 and attempted_models[0] != report["model_used"]
-    if used_fallback_model:
+    if fallback_used:
         report["notes"].append(
             f"Primary model fallback used: {attempted_models[0]} → {report['model_used']}"
         )

From 32cf043e5a0739405105954a6c33c684543fce3d Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 10 Apr 2026 00:39:30 +0000
Subject: [PATCH 06/10] Tighten fallback and docs details

Agent-Logs-Url: https://github.com/Grumpified-OGGVCT/hat_stack/sessions/5e1526c3-bd4e-48fc-a842-28b9c5065464

Co-authored-by: AccidentalJedi <92951150+AccidentalJedi@users.noreply.github.com>
---
 README.md                   | 4 +++-
 scripts/hat                 | 2 +-
 scripts/hats_task_runner.py | 4 ++--
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 83efc45..c396278 100644
--- a/README.md
+++ b/README.md
@@ -211,12 +211,14 @@ git diff main | hat review - --repo myorg/app --pr 123
 
 Task runs now support a structured playground sandbox on the runner:
 
-- Default workspace root: `/tmp/hats-playground`
+- Workflow default workspace root: `/tmp/hats-playground`
 - Layout: `<workspace>/<category>/<genre>/<project>/<run-id>/`
 - Contents: generated files, `HATS_TASK_SUMMARY.md`, `hats_task_result.json`, `PLAYGROUND_MANIFEST.json`
 - Persistence: both the run output and the full playground tree are uploaded as workflow artifacts
 - Resilience: if the first Ollama model fails, task mode retries comparable configured fallback models automatically
 
+If no workspace root is provided outside the workflow, task mode falls back to a unique temporary output directory under `/tmp`.
+
 Or dispatch directly via `gh` CLI (what your Copilot agent would call):
 
 ```bash
diff --git a/scripts/hat b/scripts/hat
index ce45814..1dc35d5 100755
--- a/scripts/hat
+++ b/scripts/hat
@@ -213,7 +213,7 @@ payload = {
     'callback_repo': os.environ.get('HAT_CB_REPO', ''),
     'callback_pr': os.environ.get('HAT_CB_PR', ''),
     'callback_issue': os.environ.get('HAT_CB_ISSUE', ''),
-    'hats': os.environ.get('HAT_HATS', ''),
+    'hats': ','.join([h.strip() for h in os.environ.get('HAT_HATS', '').split(',') if h.strip()]),
     'context': os.environ.get('HAT_CTX', ''),
     'category': os.environ.get('HAT_CATEGORY', ''),
     'genre': os.environ.get('HAT_GENRE', ''),
diff --git a/scripts/hats_task_runner.py b/scripts/hats_task_runner.py
index 2372330..3525e50 100644
--- a/scripts/hats_task_runner.py
+++ b/scripts/hats_task_runner.py
@@ -171,7 +171,7 @@ def build_run_id(explicit_run_id: str | None = None) -> str:
     github_run_id = os.environ.get("GITHUB_RUN_ID", "").strip()
     github_attempt = os.environ.get("GITHUB_RUN_ATTEMPT", "").strip()
     if github_run_id:
-        attempt_suffix = f"-attempt-{github_attempt}" if github_attempt else ""
+        attempt_suffix = f"-attempt-{github_attempt}" if github_attempt != "" else ""
         return f"run-{slugify_path_component(github_run_id, 'run')}{attempt_suffix}"
 
     return time.strftime("run-%Y%m%d-%H%M%S", time.gmtime())
@@ -529,7 +529,7 @@ def run_task_hat(config: dict, hat_id: str, task_type: str,
             timeout=hat_def.get("timeout_seconds", 300),
         )
         if not result["error"]:
-            fallback_used = candidate_model != attempted_models[0]
+            fallback_used = candidate_model != model
             break
 
     elapsed = time.time() - start

From 1c30f4506ea10788d0e516708581553bd21f1277 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 10 Apr 2026 00:40:24 +0000
Subject: [PATCH 07/10] Clean up fallback validation details

Agent-Logs-Url: https://github.com/Grumpified-OGGVCT/hat_stack/sessions/5e1526c3-bd4e-48fc-a842-28b9c5065464

Co-authored-by: AccidentalJedi <92951150+AccidentalJedi@users.noreply.github.com>
---
 scripts/hat                 | 2 +-
 scripts/hats_task_runner.py | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/scripts/hat b/scripts/hat
index 1dc35d5..ce45814 100755
--- a/scripts/hat
+++ b/scripts/hat
@@ -213,7 +213,7 @@ payload = {
     'callback_repo': os.environ.get('HAT_CB_REPO', ''),
     'callback_pr': os.environ.get('HAT_CB_PR', ''),
     'callback_issue': os.environ.get('HAT_CB_ISSUE', ''),
-    'hats': ','.join([h.strip() for h in os.environ.get('HAT_HATS', '').split(',') if h.strip()]),
+    'hats': os.environ.get('HAT_HATS', ''),
     'context': os.environ.get('HAT_CTX', ''),
     'category': os.environ.get('HAT_CATEGORY', ''),
     'genre': os.environ.get('HAT_GENRE', ''),
diff --git a/scripts/hats_task_runner.py b/scripts/hats_task_runner.py
index 3525e50..c19b959 100644
--- a/scripts/hats_task_runner.py
+++ b/scripts/hats_task_runner.py
@@ -171,7 +171,7 @@ def build_run_id(explicit_run_id: str | None = None) -> str:
     github_run_id = os.environ.get("GITHUB_RUN_ID", "").strip()
     github_attempt = os.environ.get("GITHUB_RUN_ATTEMPT", "").strip()
     if github_run_id:
-        attempt_suffix = f"-attempt-{github_attempt}" if github_attempt != "" else ""
+        attempt_suffix = f"-attempt-{github_attempt}" if github_attempt else ""
         return f"run-{slugify_path_component(github_run_id, 'run')}{attempt_suffix}"
 
     return time.strftime("run-%Y%m%d-%H%M%S", time.gmtime())
@@ -197,7 +197,7 @@ def ensure_path_within_root(root: Path, candidate: Path) -> Path:
 def safe_output_path(output_dir: Path, relative_path: str) -> Path:
     """Return a safe output path within the output directory."""
     if not relative_path.strip():
-        raise ValueError(f"Unsafe generated file path: {relative_path!r}")
+        raise ValueError("Empty file path is not allowed")
     rel = Path(relative_path)
     if rel.is_absolute():
         raise ValueError(f"Unsafe generated file path: {relative_path!r}")
@@ -511,7 +511,6 @@ def run_task_hat(config: dict, hat_id: str, task_type: str,
 
     start = time.time()
     attempted_models = []
-    fallback_used = False
     result = {
         "error": "All model attempts failed",
         "model": model,
@@ -529,10 +528,10 @@ def run_task_hat(config: dict, hat_id: str, task_type: str,
             timeout=hat_def.get("timeout_seconds", 300),
         )
         if not result["error"]:
-            fallback_used = candidate_model != model
             break
 
     elapsed = time.time() - start
+    fallback_used = not result["error"] and result["model"] != model
 
     report = {
         "hat_id": hat_id,

From 646f11e50421354ffffb812e2523de48258b798e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 10 Apr 2026 01:00:41 +0000
Subject: [PATCH 08/10] Fix review-thread task workspace issues

Agent-Logs-Url: https://github.com/Grumpified-OGGVCT/hat_stack/sessions/4d97ddbd-e0d9-4fe7-b2b0-42f70dc12d65

Co-authored-by: AccidentalJedi <92951150+AccidentalJedi@users.noreply.github.com>
---
 .github/workflows/hats-task.yml | 35 +++++++++++++++++++++------------
 scripts/hats_task_runner.py     | 21 ++++++++++++++++----
 2 files changed, 39 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/hats-task.yml b/.github/workflows/hats-task.yml
index 8d8a838..d0dc176 100644
--- a/.github/workflows/hats-task.yml
+++ b/.github/workflows/hats-task.yml
@@ -110,21 +110,30 @@ jobs:
           if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
             # workflow_dispatch inputs are controlled by the UI schema — safe
             echo "task=${{ inputs.task }}" >> "$GITHUB_OUTPUT"
-            echo "hats=${{ inputs.hats }}" >> "$GITHUB_OUTPUT"
-            echo "callback_repo=${{ inputs.callback_repo }}" >> "$GITHUB_OUTPUT"
-            echo "callback_pr=${{ inputs.callback_pr }}" >> "$GITHUB_OUTPUT"
-            echo "callback_issue=${{ inputs.callback_issue }}" >> "$GITHUB_OUTPUT"
-            echo "category=${{ inputs.category }}" >> "$GITHUB_OUTPUT"
-            echo "genre=${{ inputs.genre }}" >> "$GITHUB_OUTPUT"
-            echo "project=${{ inputs.project }}" >> "$GITHUB_OUTPUT"
-            echo "workspace_root=${{ inputs.workspace_root }}" >> "$GITHUB_OUTPUT"
-            # Prompt may contain special chars — pass via env to avoid code injection
-            INPUT_PROMPT="${{ inputs.prompt }}" python3 -c "
+            # String inputs may contain newlines — sanitize before writing outputs
+            INPUT_PROMPT="${{ inputs.prompt }}" \
+            INPUT_HATS="${{ inputs.hats }}" \
+            INPUT_CALLBACK_REPO="${{ inputs.callback_repo }}" \
+            INPUT_CALLBACK_PR="${{ inputs.callback_pr }}" \
+            INPUT_CALLBACK_ISSUE="${{ inputs.callback_issue }}" \
+            INPUT_CATEGORY="${{ inputs.category }}" \
+            INPUT_GENRE="${{ inputs.genre }}" \
+            INPUT_PROJECT="${{ inputs.project }}" \
+            INPUT_WORKSPACE_ROOT="${{ inputs.workspace_root }}" \
+            python3 -c "
           import os, re
-          val = os.environ.get('INPUT_PROMPT', '')
-          sanitized = re.sub(r'[\r\n]', ' ', val).strip()
+          def sanitize(value):
+              return re.sub(r'[\r\n]', ' ', value).strip()
           with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
-              f.write(f'prompt={sanitized}\n')
+              f.write(f\"prompt={sanitize(os.environ.get('INPUT_PROMPT', ''))}\n\")
+              f.write(f\"hats={sanitize(os.environ.get('INPUT_HATS', ''))}\n\")
+              f.write(f\"callback_repo={sanitize(os.environ.get('INPUT_CALLBACK_REPO', ''))}\n\")
+              f.write(f\"callback_pr={sanitize(os.environ.get('INPUT_CALLBACK_PR', ''))}\n\")
+              f.write(f\"callback_issue={sanitize(os.environ.get('INPUT_CALLBACK_ISSUE', ''))}\n\")
+              f.write(f\"category={sanitize(os.environ.get('INPUT_CATEGORY', ''))}\n\")
+              f.write(f\"genre={sanitize(os.environ.get('INPUT_GENRE', ''))}\n\")
+              f.write(f\"project={sanitize(os.environ.get('INPUT_PROJECT', ''))}\n\")
+              f.write(f\"workspace_root={sanitize(os.environ.get('INPUT_WORKSPACE_ROOT', ''))}\n\")
           "
           else
             # repository_dispatch — extract from client_payload with sanitization
diff --git a/scripts/hats_task_runner.py b/scripts/hats_task_runner.py
index c19b959..c3d9189 100644
--- a/scripts/hats_task_runner.py
+++ b/scripts/hats_task_runner.py
@@ -164,7 +164,7 @@ def infer_project_slug(source_repo: str | None, task_type: str) -> str:
 
 
 def build_run_id(explicit_run_id: str | None = None) -> str:
-    """Build a deterministic run id for workspace storage."""
+    """Build a run id for workspace storage, preferring GitHub run metadata when available."""
     if explicit_run_id:
         return slugify_path_component(explicit_run_id, "run")
 
@@ -282,7 +282,7 @@ def build_run_manifest(
     return {
         "schema_version": 1,
         "created_at_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
-        "status": "completed",
+        "status": task_result.get("status", "completed"),
         "task_type": task_result["task_type"],
         "prompt": prompt,
         "requested_hats": requested_hats or [],
@@ -316,7 +316,10 @@ def write_workspace_indexes(workspace_root: Path):
 
         for genre_dir in sorted(p for p in category_dir.iterdir() if p.is_dir()):
             for project_dir in sorted(p for p in genre_dir.iterdir() if p.is_dir()):
-                runs = sorted(p for p in project_dir.iterdir() if p.is_dir())
+                runs = sorted(
+                    (p for p in project_dir.iterdir() if p.is_dir()),
+                    key=lambda path: (path.stat().st_mtime, path.name),
+                )
                 if not runs:
                     continue
                 latest = runs[-1]
@@ -347,7 +350,7 @@ def write_workspace_indexes(workspace_root: Path):
             root_lines.append(f"| {category_name} | {project_count} |")
         root_lines.append("")
         root_lines.append(
-            "Folder layout: `playground/<category>/<genre>/<project>/<run-id>/`"
+            f"Folder layout: `{workspace_root}/<category>/<genre>/<project>/<run-id>/`"
         )
     else:
         root_lines.append("_No playground runs yet._")
@@ -692,8 +695,18 @@ def run_task_pipeline(config: dict, task_type: str, user_prompt: str,
         total_tokens["input"] += r["token_usage"]["input"]
         total_tokens["output"] += r["token_usage"]["output"]
 
+    primary_failed = bool(primary_result["error"]) and not primary_result["files"]
+    had_any_errors = any(result["error"] for result in all_results)
+    if primary_failed:
+        status = "failed"
+    elif had_any_errors:
+        status = "completed_with_warnings"
+    else:
+        status = "completed"
+
     return {
         "task_type": task_type,
+        "status": status,
         "primary_hat": primary_hat,
         "files": primary_result["files"],
         "summary": primary_result["summary"],

From acfe16ce9e9bb746e039ef3d3b51da89f2d95712 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 10 Apr 2026 01:01:53 +0000
Subject: [PATCH 09/10] Polish review-thread follow-up fixes

Agent-Logs-Url: https://github.com/Grumpified-OGGVCT/hat_stack/sessions/4d97ddbd-e0d9-4fe7-b2b0-42f70dc12d65

Co-authored-by: AccidentalJedi <92951150+AccidentalJedi@users.noreply.github.com>
---
 .github/workflows/hats-task.yml | 2 ++
 scripts/hats_task_runner.py     | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/.github/workflows/hats-task.yml b/.github/workflows/hats-task.yml
index d0dc176..0178378 100644
--- a/.github/workflows/hats-task.yml
+++ b/.github/workflows/hats-task.yml
@@ -123,6 +123,8 @@ jobs:
             python3 -c "
           import os, re
           def sanitize(value):
+              if not value:
+                  return ''
               return re.sub(r'[\r\n]', ' ', value).strip()
           with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
               f.write(f\"prompt={sanitize(os.environ.get('INPUT_PROMPT', ''))}\n\")
diff --git a/scripts/hats_task_runner.py b/scripts/hats_task_runner.py
index c3d9189..bb8e767 100644
--- a/scripts/hats_task_runner.py
+++ b/scripts/hats_task_runner.py
@@ -695,6 +695,8 @@ def run_task_pipeline(config: dict, task_type: str, user_prompt: str,
         total_tokens["input"] += r["token_usage"]["input"]
         total_tokens["output"] += r["token_usage"]["output"]
 
+    # Treat the task as failed only when the primary generation failed outright;
+    # supporting/gold hat errors should surface as warnings if deliverables still exist.
     primary_failed = bool(primary_result["error"]) and not primary_result["files"]
     had_any_errors = any(result["error"] for result in all_results)
     if primary_failed:

From da504f64c27bc334b9d6403557f17e4e5149cfc0 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 10 Apr 2026 01:02:58 +0000
Subject: [PATCH 10/10] Finalize review-thread cleanup

Agent-Logs-Url: https://github.com/Grumpified-OGGVCT/hat_stack/sessions/4d97ddbd-e0d9-4fe7-b2b0-42f70dc12d65

Co-authored-by: AccidentalJedi <92951150+AccidentalJedi@users.noreply.github.com>
---
 .github/workflows/hats-task.yml | 5 ++---
 scripts/hats_task_runner.py     | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/hats-task.yml b/.github/workflows/hats-task.yml
index 0178378..18b3773 100644
--- a/.github/workflows/hats-task.yml
+++ b/.github/workflows/hats-task.yml
@@ -123,9 +123,8 @@ jobs:
             python3 -c "
           import os, re
           def sanitize(value):
-              if not value:
-                  return ''
-              return re.sub(r'[\r\n]', ' ', value).strip()
+              text = str(value or '')
+              return re.sub(r'[\r\n]', ' ', text).strip()
           with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
               f.write(f\"prompt={sanitize(os.environ.get('INPUT_PROMPT', ''))}\n\")
               f.write(f\"hats={sanitize(os.environ.get('INPUT_HATS', ''))}\n\")
diff --git a/scripts/hats_task_runner.py b/scripts/hats_task_runner.py
index bb8e767..f1013ae 100644
--- a/scripts/hats_task_runner.py
+++ b/scripts/hats_task_runner.py
@@ -697,7 +697,7 @@ def run_task_pipeline(config: dict, task_type: str, user_prompt: str,
 
     # Treat the task as failed only when the primary generation failed outright;
     # supporting/gold hat errors should surface as warnings if deliverables still exist.
-    primary_failed = bool(primary_result["error"]) and not primary_result["files"]
+    primary_failed = primary_result["error"] and not primary_result["files"]
     had_any_errors = any(result["error"] for result in all_results)
     if primary_failed:
         status = "failed"