From bdb72147632bcae2cef3c194e6fa9b6537e933ec Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 00:33:29 +0000 Subject: [PATCH 01/10] Add playground task workspace scaffolding Agent-Logs-Url: https://github.com/Grumpified-OGGVCT/hat_stack/sessions/5e1526c3-bd4e-48fc-a842-28b9c5065464 Co-authored-by: AccidentalJedi <92951150+AccidentalJedi@users.noreply.github.com> --- .github/workflows/hats-task.yml | 62 ++++++- FORK_SETUP.md | 23 ++- README.md | 11 +- scripts/hat | 20 +- scripts/hats_task_runner.py | 313 +++++++++++++++++++++++++++++++- 5 files changed, 407 insertions(+), 22 deletions(-) diff --git a/.github/workflows/hats-task.yml b/.github/workflows/hats-task.yml index 4e781b6..8029ac3 100644 --- a/.github/workflows/hats-task.yml +++ b/.github/workflows/hats-task.yml @@ -61,6 +61,26 @@ on: required: false type: string default: "" + category: + description: "Playground category (default: inferred from task type)" + required: false + type: string + default: "" + genre: + description: "Playground genre/type bucket" + required: false + type: string + default: "" + project: + description: "Playground project slug" + required: false + type: string + default: "" + workspace_root: + description: "Sandbox workspace root on the runner" + required: false + type: string + default: "/tmp/hats-playground" permissions: contents: read @@ -94,6 +114,10 @@ jobs: echo "callback_repo=${{ inputs.callback_repo }}" >> "$GITHUB_OUTPUT" echo "callback_pr=${{ inputs.callback_pr }}" >> "$GITHUB_OUTPUT" echo "callback_issue=${{ inputs.callback_issue }}" >> "$GITHUB_OUTPUT" + echo "category=${{ inputs.category }}" >> "$GITHUB_OUTPUT" + echo "genre=${{ inputs.genre }}" >> "$GITHUB_OUTPUT" + echo "project=${{ inputs.project }}" >> "$GITHUB_OUTPUT" + echo "workspace_root=${{ inputs.workspace_root }}" >> "$GITHUB_OUTPUT" # Prompt may contain special chars — pass via env to avoid code injection INPUT_PROMPT="${{ inputs.prompt }}" python3 -c " import os, re @@ -126,8 +150,12 @@ jobs: f.write(f\"callback_repo={sanitize(payload.get('callback_repo', ''))}\n\") f.write(f\"callback_pr={sanitize(payload.get('callback_pr', ''))}\n\") f.write(f\"callback_issue={sanitize(payload.get('callback_issue', ''))}\n\") + f.write(f\"category={sanitize(payload.get('category', ''))}\n\") + f.write(f\"genre={sanitize(payload.get('genre', ''))}\n\") + f.write(f\"project={sanitize(payload.get('project', ''))}\n\") + f.write(f\"workspace_root={sanitize(payload.get('workspace_root', '/tmp/hats-playground'))}\n\") f.write(f\"context={sanitize(payload.get('context', ''))}\n\") - " + " fi - name: Fetch context from callback repo @@ -175,14 +203,29 @@ jobs: --task "${{ steps.params.outputs.task }}" --prompt "${{ steps.params.outputs.prompt }}" --config scripts/hat_configs.yml - --output /tmp/hats-task-output --json-file /tmp/hats-task-result.json + --workspace-root "${{ steps.params.outputs.workspace_root }}" + --source-repo "${{ steps.params.outputs.callback_repo }}" + --source-pr "${{ steps.params.outputs.callback_pr }}" + --source-issue "${{ steps.params.outputs.callback_issue }}" ) if [ -n "${{ steps.params.outputs.hats }}" ]; then ARGS+=(--hats "${{ steps.params.outputs.hats }}") fi + if [ -n "${{ steps.params.outputs.category }}" ]; then + ARGS+=(--category "${{ steps.params.outputs.category }}") + fi + + if [ -n "${{ steps.params.outputs.genre }}" ]; then + ARGS+=(--genre "${{ steps.params.outputs.genre }}") + fi + + if [ -n "${{ steps.params.outputs.project }}" ]; then + ARGS+=(--project "${{ steps.params.outputs.project }}") + fi + if [ -d "/tmp/hats-context" ] && [ "$(ls -A /tmp/hats-context 2>/dev/null)" ]; then ARGS+=(--context-dir /tmp/hats-context) fi @@ -211,6 +254,7 @@ jobs: CALLBACK_PR="${{ steps.params.outputs.callback_pr }}" CALLBACK_ISSUE="${{ steps.params.outputs.callback_issue }}" TARGET="${CALLBACK_PR:-$CALLBACK_ISSUE}" + OUTPUT_DIR="${{ steps.run.outputs.output_dir }}" if [ -z "$GH_TOKEN" ] || [ -z "$TARGET" ]; then echo "āš ļø Cannot post results — missing token or target" @@ -218,8 +262,8 @@ jobs: fi # Build comment from summary - if [ -f /tmp/hats-task-output/HATS_TASK_SUMMARY.md ]; then - REPORT_BODY=$(cat /tmp/hats-task-output/HATS_TASK_SUMMARY.md) + if [ -n "$OUTPUT_DIR" ] && [ -f "$OUTPUT_DIR/HATS_TASK_SUMMARY.md" ]; then + REPORT_BODY=$(cat "$OUTPUT_DIR/HATS_TASK_SUMMARY.md") else REPORT_BODY="šŸŽ© Hats Task completed. Files generated: ${{ steps.run.outputs.files_generated }}" fi @@ -241,7 +285,15 @@ jobs: uses: actions/upload-artifact@v4 with: name: hats-task-output - path: /tmp/hats-task-output/ + path: ${{ steps.run.outputs.output_dir }}/ + retention-days: 30 + + - name: Upload playground workspace + if: always() && steps.run.outputs.workspace_root != '' + uses: actions/upload-artifact@v4 + with: + name: hats-playground + path: ${{ steps.run.outputs.workspace_root }}/ retention-days: 30 - name: Upload JSON result diff --git a/FORK_SETUP.md b/FORK_SETUP.md index 262a4d8..d703011 100644 --- a/FORK_SETUP.md +++ b/FORK_SETUP.md @@ -179,7 +179,7 @@ export HAT_STACK_REPO="YOUR_USERNAME/hat_stack" ```bash # Generate a new code module — results posted as a PR comment hat task generate_code "Build a FastAPI auth module with JWT and refresh tokens" \ - --repo myorg/myapp --pr 42 + --repo myorg/myapp --pr 42 --category code --genre api --project auth-service # Write documentation for an endpoint hat task generate_docs "Write API documentation for the /users endpoints" \ @@ -187,7 +187,7 @@ hat task generate_docs "Write API documentation for the /users endpoints" \ # Plan a migration hat task plan "Plan a migration from REST to GraphQL for the orders service" \ - --repo myorg/myapp + --repo myorg/myapp --category plans --genre migration --project orders-service # Generate tests for a module hat task test "Write unit tests for auth.py covering edge cases and error paths" \ @@ -210,7 +210,24 @@ hat status 3. The task runner selects the right hats and models for the job 4. Primary hat generates the deliverable, supporting hats review/enhance it 5. Gold Hat does final QA -6. Results are posted back to your project's PR/issue as a comment +6. Results are written into a sandboxed playground tree on the runner using `category/genre/project/run-id` +7. Results are posted back to your project's PR/issue as a comment and uploaded as artifacts + +**Playground layout:** + +```text +/tmp/hats-playground/ +└── / + └── / + └── / + └── / + ā”œā”€ā”€ generated files... + ā”œā”€ā”€ HATS_TASK_SUMMARY.md + ā”œā”€ā”€ hats_task_result.json + └── PLAYGROUND_MANIFEST.json +``` + +If you do not pass `--category`, `--genre`, or `--project`, Hat Stack infers sensible defaults and creates the folders automatically. **For Copilot in VS Code:** Your Copilot agent can shell out to `hat task ...` commands. The `gh` CLI handles auth, and hat_stack handles execution. Your Copilot agent gives the instruction, hat_stack's model pool does the heavy lifting, results come back to the PR. diff --git a/README.md b/README.md index e712b50..a092f3c 100644 --- a/README.md +++ b/README.md @@ -190,7 +190,8 @@ cp scripts/hat /usr/local/bin/hat # or add scripts/ to PATH export HAT_STACK_REPO="YOUR_USERNAME/hat_stack" # Generate code -hat task generate_code "Build a FastAPI auth module with JWT" --repo myorg/app --pr 42 +hat task generate_code "Build a FastAPI auth module with JWT" \ + --repo myorg/app --pr 42 --category code --genre api --project auth-service # Write documentation hat task generate_docs "Write API docs for /users endpoints" --repo myorg/app --issue 10 @@ -208,6 +209,13 @@ hat task analyze "Security audit of payment processing" --repo myorg/payments git diff main | hat review - --repo myorg/app --pr 123 ``` +Task runs now support a structured playground sandbox on the runner: + +- Default workspace root: `/tmp/hats-playground` +- Layout: `/////` +- Contents: generated files, `HATS_TASK_SUMMARY.md`, `hats_task_result.json`, `PLAYGROUND_MANIFEST.json` +- Persistence: both the run output and the full playground tree are uploaded as workflow artifacts + Or dispatch directly via `gh` CLI (what your Copilot agent would call): ```bash @@ -275,4 +283,3 @@ hat_stack/ ## License MIT — See [LICENSE](LICENSE). - diff --git a/scripts/hat b/scripts/hat index 5b978ad..ce45814 100755 --- a/scripts/hat +++ b/scripts/hat @@ -9,7 +9,8 @@ # # Usage: # hat review [--repo owner/repo] [--pr 42] -# hat task "" [--repo owner/repo] [--pr 42] [--hats black,green] +# hat task "" [--repo owner/repo] [--pr 42] [--hats black,green] \ +# [--category code] [--genre api] [--project auth-service] # hat status [run_id] # hat list-tasks # @@ -19,7 +20,7 @@ # # # Generate a new module # hat task generate_code "Build a FastAPI auth module with JWT and refresh tokens" \ -# --repo myorg/myapp --pr 123 +# --repo myorg/myapp --pr 123 --category code --genre api --project auth-service # # # Write docs for an existing module # hat task generate_docs "Write API documentation for the /users endpoints" \ @@ -174,6 +175,7 @@ cmd_task() { local task_type="$1"; shift local prompt="$1"; shift local callback_repo="" callback_pr="" callback_issue="" hats="" context="" + local category="" genre="" project="" while [[ $# -gt 0 ]]; do case "$1" in @@ -182,6 +184,9 @@ cmd_task() { --issue) callback_issue="$2"; shift 2 ;; --hats) hats="$2"; shift 2 ;; --context) context="$2"; shift 2 ;; + --category) category="$2"; shift 2 ;; + --genre) genre="$2"; shift 2 ;; + --project) project="$2"; shift 2 ;; *) die "Unknown option: $1" ;; esac done @@ -199,6 +204,7 @@ cmd_task() { payload=$(HAT_TASK="$task_type" HAT_PROMPT="$prompt" \ HAT_CB_REPO="$callback_repo" HAT_CB_PR="$callback_pr" \ HAT_CB_ISSUE="$callback_issue" HAT_HATS="$hats" HAT_CTX="$context" \ + HAT_CATEGORY="$category" HAT_GENRE="$genre" HAT_PROJECT="$project" \ python3 -c " import json, os payload = { @@ -209,6 +215,9 @@ payload = { 'callback_issue': os.environ.get('HAT_CB_ISSUE', ''), 'hats': os.environ.get('HAT_HATS', ''), 'context': os.environ.get('HAT_CTX', ''), + 'category': os.environ.get('HAT_CATEGORY', ''), + 'genre': os.environ.get('HAT_GENRE', ''), + 'project': os.environ.get('HAT_PROJECT', ''), } payload = {k: v for k, v in payload.items() if v} print(json.dumps(payload)) @@ -250,7 +259,7 @@ cmd_list_tasks() { echo "" echo -e "${BOLD}Usage:${NC}" echo ' hat task generate_code "Build a user auth module" --repo myorg/myapp --pr 42' - echo ' hat task plan "Plan migration to microservices" --repo myorg/monolith' + echo ' hat task plan "Plan migration to microservices" --repo myorg/monolith --category plans --genre migration --project orders' } cmd_help() { @@ -269,10 +278,13 @@ cmd_help() { echo " --issue Issue to post results to" echo " --hats Specific hats to use" echo " --context Additional context" + echo " --category Playground category label" + echo " --genre Playground genre/type label" + echo " --project Playground project label" echo "" echo -e "${BOLD}Examples:${NC}" echo ' git diff main | hat review - --repo myorg/app --pr 42' - echo ' hat task generate_code "Build JWT auth module" --repo myorg/app --pr 42' + echo ' hat task generate_code "Build JWT auth module" --repo myorg/app --pr 42 --category code --genre api --project auth' echo ' hat task generate_docs "Write API docs for /users" --repo myorg/app --issue 10' echo ' hat task plan "Plan GraphQL migration" --repo myorg/app' echo ' hat task analyze "Security audit of payments" --repo myorg/payments' diff --git a/scripts/hats_task_runner.py b/scripts/hats_task_runner.py index 13bc9e4..41b88c3 100644 --- a/scripts/hats_task_runner.py +++ b/scripts/hats_task_runner.py @@ -32,6 +32,7 @@ import argparse import json import os +import re import sys import time from concurrent.futures import ThreadPoolExecutor, as_completed @@ -89,6 +90,15 @@ }, } +DEFAULT_CATEGORIES = { + "generate_code": "code", + "generate_docs": "docs", + "refactor": "code", + "analyze": "analysis", + "plan": "plans", + "test": "tests", +} + # --------------------------------------------------------------------------- # Task-mode system prompts — transform hats from reviewers to builders # --------------------------------------------------------------------------- @@ -136,6 +146,210 @@ def load_config(config_path: str | Path) -> dict: return yaml.safe_load(fh) +def slugify_path_component(value: str | None, default: str) -> str: + """Normalize workspace path components for predictable human-readable folders.""" + text = (value or "").strip().lower() + text = re.sub(r"[^a-z0-9._-]+", "-", text) + text = re.sub(r"-{2,}", "-", text).strip(".-") + return text or default + + +def infer_project_slug(source_repo: str | None, task_type: str) -> str: + """Choose a stable project folder name.""" + if source_repo: + repo_name = source_repo.rsplit("/", 1)[-1] + return slugify_path_component(repo_name, "project") + return slugify_path_component(task_type, "adhoc") + + +def build_run_id(explicit_run_id: str | None = None) -> str: + """Build a deterministic run id for workspace storage.""" + if explicit_run_id: + return slugify_path_component(explicit_run_id, "run") + + github_run_id = os.environ.get("GITHUB_RUN_ID", "").strip() + github_attempt = os.environ.get("GITHUB_RUN_ATTEMPT", "").strip() + if github_run_id: + attempt_suffix = f"-attempt-{github_attempt}" if github_attempt else "" + return f"run-{slugify_path_component(github_run_id, 'run')}{attempt_suffix}" + + return time.strftime("run-%Y%m%d-%H%M%S", time.gmtime()) + + +def resolve_workspace_root(workspace_root: str | None) -> Path | None: + """Resolve and validate the optional workspace root.""" + if not workspace_root: + return None + return Path(workspace_root).expanduser().resolve() + + +def ensure_path_within_root(root: Path, candidate: Path) -> Path: + """Ensure candidate resolves inside the declared sandbox root.""" + resolved = candidate.resolve() + try: + resolved.relative_to(root) + except ValueError as exc: + raise ValueError(f"Path escapes sandbox root: {candidate}") from exc + return resolved + + +def safe_output_path(output_dir: Path, relative_path: str) -> Path: + """Return a safe output path within the output directory.""" + rel = Path(relative_path) + if rel.is_absolute() or not relative_path.strip(): + raise ValueError(f"Unsafe generated file path: {relative_path!r}") + if any(part in ("..", "") for part in rel.parts): + raise ValueError(f"Unsafe generated file path: {relative_path!r}") + return ensure_path_within_root(output_dir.resolve(), output_dir / rel) + + +def prepare_workspace( + task_type: str, + workspace_root: str | None = None, + category: str | None = None, + genre: str | None = None, + project: str | None = None, + run_id: str | None = None, + source_repo: str | None = None, + explicit_output_dir: str | None = None, +) -> dict: + """Prepare sandbox workspace metadata and return resolved output path.""" + resolved_workspace_root = resolve_workspace_root(workspace_root) + if not resolved_workspace_root: + output_dir = Path(explicit_output_dir or "/tmp/hats-task-output").expanduser().resolve() + return { + "workspace_root": None, + "output_dir": output_dir, + "category": None, + "genre": None, + "project": None, + "run_id": None, + } + + normalized_category = slugify_path_component( + category, DEFAULT_CATEGORIES.get(task_type, "misc") + ) + normalized_genre = slugify_path_component(genre, "general") + normalized_project = slugify_path_component( + project, infer_project_slug(source_repo, task_type) + ) + normalized_run_id = build_run_id(run_id) + + output_dir = ensure_path_within_root( + resolved_workspace_root, + resolved_workspace_root / normalized_category / normalized_genre / normalized_project / normalized_run_id, + ) + output_dir.mkdir(parents=True, exist_ok=True) + + return { + "workspace_root": resolved_workspace_root, + "output_dir": output_dir, + "category": normalized_category, + "genre": normalized_genre, + "project": normalized_project, + "run_id": normalized_run_id, + } + + +def build_run_manifest( + task_result: dict, + prompt: str, + requested_hats: list[str] | None, + source_repo: str | None, + source_pr: str | None, + source_issue: str | None, + workspace_info: dict, +) -> dict: + """Build manifest metadata for a sandboxed task run.""" + output_dir = workspace_info["output_dir"] + files = [ + { + "path": entry["path"], + "description": entry.get("description", ""), + "absolute_path": str((output_dir / entry["path"]).resolve()), + } + for entry in task_result.get("files", []) + ] + + return { + "schema_version": 1, + "created_at_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), + "status": "completed", + "task_type": task_result["task_type"], + "prompt": prompt, + "requested_hats": requested_hats or [], + "primary_hat": task_result["primary_hat"], + "summary": task_result["summary"], + "notes": task_result.get("notes", []), + "source": { + "repo": source_repo or "", + "pr": source_pr or "", + "issue": source_issue or "", + }, + "workspace": { + "root": str(workspace_info["workspace_root"]) if workspace_info["workspace_root"] else "", + "category": workspace_info["category"] or "", + "genre": workspace_info["genre"] or "", + "project": workspace_info["project"] or "", + "run_id": workspace_info["run_id"] or "", + "output_dir": str(output_dir), + }, + "generated_files": files, + "stats": task_result.get("stats", {}), + } + + +def write_workspace_indexes(workspace_root: Path): + """Write human-readable indexes for the sandbox workspace.""" + categories = [] + for category_dir in sorted(p for p in workspace_root.iterdir() if p.is_dir()): + category_lines = [f"# {category_dir.name} playground index", ""] + category_projects = [] + + for genre_dir in sorted(p for p in category_dir.iterdir() if p.is_dir()): + for project_dir in sorted(p for p in genre_dir.iterdir() if p.is_dir()): + runs = sorted(p for p in project_dir.iterdir() if p.is_dir()) + if not runs: + continue + latest = runs[-1] + category_projects.append((genre_dir.name, project_dir.name, latest.name, len(runs))) + + if category_projects: + category_lines.append("| Genre | Project | Latest Run | Runs |") + category_lines.append("|-------|---------|------------|------|") + for genre_name, project_name, latest_run, run_count in category_projects: + category_lines.append( + f"| {genre_name} | {project_name} | `{latest_run}` | {run_count} |" + ) + category_lines.append("") + else: + category_lines.append("_No projects yet._") + category_lines.append("") + + (category_dir / "CATEGORY_INDEX.md").write_text( + "\n".join(category_lines), encoding="utf-8" + ) + categories.append((category_dir.name, len(category_projects))) + + root_lines = ["# Hats Playground Index", ""] + if categories: + root_lines.append("| Category | Projects |") + root_lines.append("|----------|----------|") + for category_name, project_count in categories: + root_lines.append(f"| {category_name} | {project_count} |") + root_lines.append("") + root_lines.append( + "Folder layout: `playground/////`" + ) + else: + root_lines.append("_No playground runs yet._") + root_lines.append("") + + (workspace_root / "PLAYGROUND_INDEX.md").write_text( + "\n".join(root_lines), encoding="utf-8" + ) + + def call_ollama(config: dict, model: str, system_prompt: str, user_prompt: str, temperature: float = 0.3, max_tokens: int = 8192, timeout: int = 300) -> dict: @@ -445,13 +659,22 @@ def run_task_pipeline(config: dict, task_type: str, user_prompt: str, } -def write_output_files(task_result: dict, output_dir: str): +def write_output_files( + task_result: dict, + output_dir: str | Path, + workspace_info: dict | None = None, + prompt: str = "", + requested_hats: list[str] | None = None, + source_repo: str | None = None, + source_pr: str | None = None, + source_issue: str | None = None, +): """Write generated files to the output directory.""" - out = Path(output_dir) + out = Path(output_dir).resolve() out.mkdir(parents=True, exist_ok=True) for file_entry in task_result.get("files", []): - filepath = out / file_entry["path"] + filepath = safe_output_path(out, file_entry["path"]) filepath.parent.mkdir(parents=True, exist_ok=True) filepath.write_text(file_entry["content"], encoding="utf-8") print(f" šŸ“„ {filepath}", file=sys.stderr) @@ -487,6 +710,21 @@ def write_output_files(task_result: dict, output_dir: str): json_path.write_text(json.dumps(task_result, indent=2), encoding="utf-8") print(f" šŸ“Š {json_path}", file=sys.stderr) + if workspace_info and workspace_info.get("workspace_root"): + manifest = build_run_manifest( + task_result, + prompt=prompt, + requested_hats=requested_hats, + source_repo=source_repo, + source_pr=source_pr, + source_issue=source_issue, + workspace_info=workspace_info, + ) + manifest_path = out / "PLAYGROUND_MANIFEST.json" + manifest_path.write_text(json.dumps(manifest, indent=2), encoding="utf-8") + print(f" šŸ—‚ļø {manifest_path}", file=sys.stderr) + write_workspace_indexes(workspace_info["workspace_root"]) + # --------------------------------------------------------------------------- # CLI entry point @@ -525,9 +763,61 @@ def main(): "--json-file", default=None, help="Path to write JSON result (in addition to output dir)" ) + parser.add_argument( + "--workspace-root", default=None, + help="Optional sandbox root for structured playground storage" + ) + parser.add_argument( + "--category", default=None, + help="Optional playground category (default: inferred from task type)" + ) + parser.add_argument( + "--genre", default=None, + help="Optional playground genre/type bucket" + ) + parser.add_argument( + "--project", default=None, + help="Optional playground project slug" + ) + parser.add_argument( + "--run-id", default=None, + help="Optional run id folder name inside the playground project" + ) + parser.add_argument( + "--source-repo", default=None, + help="Source repo for manifest metadata" + ) + parser.add_argument( + "--source-pr", default=None, + help="Source PR number for manifest metadata" + ) + parser.add_argument( + "--source-issue", default=None, + help="Source issue number for manifest metadata" + ) args = parser.parse_args() + workspace_info = prepare_workspace( + task_type=args.task, + workspace_root=args.workspace_root, + category=args.category, + genre=args.genre, + project=args.project, + run_id=args.run_id, + source_repo=args.source_repo, + explicit_output_dir=args.output, + ) + output_dir = workspace_info["output_dir"] + + github_output = os.environ.get("GITHUB_OUTPUT") + if github_output: + with open(github_output, "a", encoding="utf-8") as fh: + fh.write(f"output_dir={output_dir}\n") + fh.write( + f"workspace_root={workspace_info['workspace_root'] or ''}\n" + ) + # Preflight api_key = os.environ.get("OLLAMA_API_KEY", "").strip() if not api_key: @@ -554,7 +844,7 @@ def main(): requested_hats = None if args.hats: - requested_hats = [h.strip() for h in args.hats.split(",")] + requested_hats = [h.strip() for h in args.hats.split(",") if h.strip()] # Run the task result = run_task_pipeline( @@ -564,21 +854,28 @@ def main(): ) # Write outputs - print(f"\nšŸ“¦ Writing output to {args.output}/", file=sys.stderr) - write_output_files(result, args.output) + print(f"\nšŸ“¦ Writing output to {output_dir}/", file=sys.stderr) + write_output_files( + result, + output_dir, + workspace_info=workspace_info, + prompt=args.prompt, + requested_hats=requested_hats, + source_repo=args.source_repo, + source_pr=args.source_pr, + source_issue=args.source_issue, + ) if args.json_file: with open(args.json_file, "w", encoding="utf-8") as fh: json.dump(result, fh, indent=2) # GitHub Actions outputs - github_output = os.environ.get("GITHUB_OUTPUT") if github_output: with open(github_output, "a", encoding="utf-8") as fh: fh.write(f"task_type={result['task_type']}\n") fh.write(f"files_generated={len(result['files'])}\n") fh.write(f"hats_executed={result['stats']['hats_executed']}\n") - fh.write(f"output_dir={args.output}\n") print(f"\nāœ… Task complete: {len(result['files'])} files generated, " f"{result['stats']['hats_executed']} hats used", file=sys.stderr) From 3ee1595a48b5cae3596b09c3fee5c478b8dc6e54 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 00:34:43 +0000 Subject: [PATCH 02/10] Add task playground sandbox and model fallbacks Agent-Logs-Url: https://github.com/Grumpified-OGGVCT/hat_stack/sessions/5e1526c3-bd4e-48fc-a842-28b9c5065464 Co-authored-by: AccidentalJedi <92951150+AccidentalJedi@users.noreply.github.com> --- FORK_SETUP.md | 2 ++ README.md | 1 + scripts/hats_task_runner.py | 72 ++++++++++++++++++++++++++++++------- 3 files changed, 62 insertions(+), 13 deletions(-) diff --git a/FORK_SETUP.md b/FORK_SETUP.md index d703011..0c0abc0 100644 --- a/FORK_SETUP.md +++ b/FORK_SETUP.md @@ -229,6 +229,8 @@ hat status If you do not pass `--category`, `--genre`, or `--project`, Hat Stack infers sensible defaults and creates the folders automatically. +If the first Ollama model fails during task mode, Hat Stack automatically retries comparable configured fallback models before giving up. + **For Copilot in VS Code:** Your Copilot agent can shell out to `hat task ...` commands. The `gh` CLI handles auth, and hat_stack handles execution. Your Copilot agent gives the instruction, hat_stack's model pool does the heavy lifting, results come back to the PR. --- diff --git a/README.md b/README.md index a092f3c..83efc45 100644 --- a/README.md +++ b/README.md @@ -215,6 +215,7 @@ Task runs now support a structured playground sandbox on the runner: - Layout: `/////` - Contents: generated files, `HATS_TASK_SUMMARY.md`, `hats_task_result.json`, `PLAYGROUND_MANIFEST.json` - Persistence: both the run output and the full playground tree are uploaded as workflow artifacts +- Resilience: if the first Ollama model fails, task mode retries comparable configured fallback models automatically Or dispatch directly via `gh` CLI (what your Copilot agent would call): diff --git a/scripts/hats_task_runner.py b/scripts/hats_task_runner.py index 41b88c3..c7e5a95 100644 --- a/scripts/hats_task_runner.py +++ b/scripts/hats_task_runner.py @@ -428,6 +428,42 @@ def select_model_for_task(config: dict, hat_id: str, task_type: str) -> str: return hat_def.get("primary_model", "nemotron-3-super") +def build_comparable_model_sequence( + config: dict, + primary_model: str, + fallback_model: str | None = None, +) -> list[str]: + """Build a prioritized model fallback list using comparable configured tiers.""" + models_cfg = config.get("models", {}) + seen = set() + ordered_models = [] + + def add(model_name: str | None): + if model_name and model_name in models_cfg and model_name not in seen: + ordered_models.append(model_name) + seen.add(model_name) + + add(primary_model) + add(fallback_model) + + primary_tier = models_cfg.get(primary_model, {}).get("tier") + fallback_tier = models_cfg.get(fallback_model, {}).get("tier") if fallback_model else None + + for model_name, model_meta in models_cfg.items(): + if model_meta.get("tier") == primary_tier: + add(model_name) + + if fallback_tier and fallback_tier != primary_tier: + for model_name, model_meta in models_cfg.items(): + if model_meta.get("tier") == fallback_tier: + add(model_name) + + for model_name in models_cfg: + add(model_name) + + return ordered_models + + def build_task_prompt(config: dict, hat_id: str, task_type: str, user_prompt: str, context_files: dict | None = None) -> tuple[str, str]: """Build the system and user prompts for a task execution. @@ -466,29 +502,34 @@ def run_task_hat(config: dict, hat_id: str, task_type: str, ) start = time.time() - result = call_ollama( - config, model, system_prompt, full_user_prompt, - temperature=hat_def.get("temperature", 0.3), - max_tokens=8192, # Task mode needs more output room - timeout=hat_def.get("timeout_seconds", 300), - ) - elapsed = time.time() - start - - # Try fallback if primary fails - if result["error"] and hat_def.get("fallback_model"): + attempted_models = [] + result = { + "error": "No model attempts executed", + "model": model, + "content": None, + "usage": {"input": 0, "output": 0}, + } + for candidate_model in build_comparable_model_sequence( + config, model, hat_def.get("fallback_model") + ): + attempted_models.append(candidate_model) result = call_ollama( - config, hat_def["fallback_model"], system_prompt, full_user_prompt, + config, candidate_model, system_prompt, full_user_prompt, temperature=hat_def.get("temperature", 0.3), - max_tokens=8192, + max_tokens=8192, # Task mode needs more output room timeout=hat_def.get("timeout_seconds", 300), ) - elapsed = time.time() - start + if not result["error"]: + break + + elapsed = time.time() - start report = { "hat_id": hat_id, "hat_name": hat_def.get("name", hat_id), "emoji": hat_def.get("emoji", "šŸŽ©"), "model_used": result["model"], + "attempted_models": attempted_models, "latency_seconds": round(elapsed, 2), "token_usage": result["usage"], "error": result["error"], @@ -512,6 +553,11 @@ def run_task_hat(config: dict, hat_id: str, task_type: str, }] report["summary"] = "Model returned unstructured output" + if len(attempted_models) > 1 and report["model_used"] != attempted_models[0]: + report["notes"].append( + f"Primary model fallback used: {attempted_models[0]} → {report['model_used']}" + ) + return report From c483be25fd7ef2b12e2bf107d372cce55cb7b4d9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 00:36:20 +0000 Subject: [PATCH 03/10] Refine task sandbox validation fixes Agent-Logs-Url: https://github.com/Grumpified-OGGVCT/hat_stack/sessions/5e1526c3-bd4e-48fc-a842-28b9c5065464 Co-authored-by: AccidentalJedi <92951150+AccidentalJedi@users.noreply.github.com> --- .github/workflows/hats-task.yml | 4 ++-- scripts/hats_task_runner.py | 12 +++++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.github/workflows/hats-task.yml b/.github/workflows/hats-task.yml index 8029ac3..3e1e8af 100644 --- a/.github/workflows/hats-task.yml +++ b/.github/workflows/hats-task.yml @@ -155,8 +155,8 @@ jobs: f.write(f\"project={sanitize(payload.get('project', ''))}\n\") f.write(f\"workspace_root={sanitize(payload.get('workspace_root', '/tmp/hats-playground'))}\n\") f.write(f\"context={sanitize(payload.get('context', ''))}\n\") - " - fi + " + fi - name: Fetch context from callback repo id: context diff --git a/scripts/hats_task_runner.py b/scripts/hats_task_runner.py index c7e5a95..24df3dc 100644 --- a/scripts/hats_task_runner.py +++ b/scripts/hats_task_runner.py @@ -34,6 +34,7 @@ import os import re import sys +import tempfile import time from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path @@ -198,7 +199,7 @@ def safe_output_path(output_dir: Path, relative_path: str) -> Path: rel = Path(relative_path) if rel.is_absolute() or not relative_path.strip(): raise ValueError(f"Unsafe generated file path: {relative_path!r}") - if any(part in ("..", "") for part in rel.parts): + if any(part == ".." for part in rel.parts): raise ValueError(f"Unsafe generated file path: {relative_path!r}") return ensure_path_within_root(output_dir.resolve(), output_dir / rel) @@ -216,7 +217,12 @@ def prepare_workspace( """Prepare sandbox workspace metadata and return resolved output path.""" resolved_workspace_root = resolve_workspace_root(workspace_root) if not resolved_workspace_root: - output_dir = Path(explicit_output_dir or "/tmp/hats-task-output").expanduser().resolve() + default_output_dir = ( + Path(tempfile.gettempdir()) + / f"hats-task-output-{build_run_id()}-{os.getpid()}" + ) + output_dir = Path(explicit_output_dir or default_output_dir).expanduser().resolve() + output_dir.mkdir(parents=True, exist_ok=True) return { "workspace_root": None, "output_dir": output_dir, @@ -504,7 +510,7 @@ def run_task_hat(config: dict, hat_id: str, task_type: str, start = time.time() attempted_models = [] result = { - "error": "No model attempts executed", + "error": "All model attempts failed", "model": model, "content": None, "usage": {"input": 0, "output": 0}, From 494a2fdee4123060f466f7b62613d00245e5de57 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 00:37:21 +0000 Subject: [PATCH 04/10] Clarify fallback note logic Agent-Logs-Url: https://github.com/Grumpified-OGGVCT/hat_stack/sessions/5e1526c3-bd4e-48fc-a842-28b9c5065464 Co-authored-by: AccidentalJedi <92951150+AccidentalJedi@users.noreply.github.com> --- scripts/hats_task_runner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/hats_task_runner.py b/scripts/hats_task_runner.py index 24df3dc..e2b5f0b 100644 --- a/scripts/hats_task_runner.py +++ b/scripts/hats_task_runner.py @@ -559,7 +559,8 @@ def run_task_hat(config: dict, hat_id: str, task_type: str, }] report["summary"] = "Model returned unstructured output" - if len(attempted_models) > 1 and report["model_used"] != attempted_models[0]: + used_fallback_model = len(attempted_models) > 1 and attempted_models[0] != report["model_used"] + if used_fallback_model: report["notes"].append( f"Primary model fallback used: {attempted_models[0]} → {report['model_used']}" ) From 11e3f69fdbaef99bbbf176bac75ee6fb24710909 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 00:38:22 +0000 Subject: [PATCH 05/10] Address final review feedback Agent-Logs-Url: https://github.com/Grumpified-OGGVCT/hat_stack/sessions/5e1526c3-bd4e-48fc-a842-28b9c5065464 Co-authored-by: AccidentalJedi <92951150+AccidentalJedi@users.noreply.github.com> --- .github/workflows/hats-task.yml | 2 +- scripts/hats_task_runner.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/hats-task.yml b/.github/workflows/hats-task.yml index 3e1e8af..8d8a838 100644 --- a/.github/workflows/hats-task.yml +++ b/.github/workflows/hats-task.yml @@ -156,7 +156,7 @@ jobs: f.write(f\"workspace_root={sanitize(payload.get('workspace_root', '/tmp/hats-playground'))}\n\") f.write(f\"context={sanitize(payload.get('context', ''))}\n\") " - fi + fi - name: Fetch context from callback repo id: context diff --git a/scripts/hats_task_runner.py b/scripts/hats_task_runner.py index e2b5f0b..2372330 100644 --- a/scripts/hats_task_runner.py +++ b/scripts/hats_task_runner.py @@ -196,8 +196,10 @@ def ensure_path_within_root(root: Path, candidate: Path) -> Path: def safe_output_path(output_dir: Path, relative_path: str) -> Path: """Return a safe output path within the output directory.""" + if not relative_path.strip(): + raise ValueError(f"Unsafe generated file path: {relative_path!r}") rel = Path(relative_path) - if rel.is_absolute() or not relative_path.strip(): + if rel.is_absolute(): raise ValueError(f"Unsafe generated file path: {relative_path!r}") if any(part == ".." for part in rel.parts): raise ValueError(f"Unsafe generated file path: {relative_path!r}") @@ -509,6 +511,7 @@ def run_task_hat(config: dict, hat_id: str, task_type: str, start = time.time() attempted_models = [] + fallback_used = False result = { "error": "All model attempts failed", "model": model, @@ -526,6 +529,7 @@ def run_task_hat(config: dict, hat_id: str, task_type: str, timeout=hat_def.get("timeout_seconds", 300), ) if not result["error"]: + fallback_used = candidate_model != attempted_models[0] break elapsed = time.time() - start @@ -559,8 +563,7 @@ def run_task_hat(config: dict, hat_id: str, task_type: str, }] report["summary"] = "Model returned unstructured output" - used_fallback_model = len(attempted_models) > 1 and attempted_models[0] != report["model_used"] - if used_fallback_model: + if fallback_used: report["notes"].append( f"Primary model fallback used: {attempted_models[0]} → {report['model_used']}" ) From 32cf043e5a0739405105954a6c33c684543fce3d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 00:39:30 +0000 Subject: [PATCH 06/10] Tighten fallback and docs details Agent-Logs-Url: https://github.com/Grumpified-OGGVCT/hat_stack/sessions/5e1526c3-bd4e-48fc-a842-28b9c5065464 Co-authored-by: AccidentalJedi <92951150+AccidentalJedi@users.noreply.github.com> --- README.md | 4 +++- scripts/hat | 2 +- scripts/hats_task_runner.py | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 83efc45..c396278 100644 --- a/README.md +++ b/README.md @@ -211,12 +211,14 @@ git diff main | hat review - --repo myorg/app --pr 123 Task runs now support a structured playground sandbox on the runner: -- Default workspace root: `/tmp/hats-playground` +- Workflow default workspace root: `/tmp/hats-playground` - Layout: `/////` - Contents: generated files, `HATS_TASK_SUMMARY.md`, `hats_task_result.json`, `PLAYGROUND_MANIFEST.json` - Persistence: both the run output and the full playground tree are uploaded as workflow artifacts - Resilience: if the first Ollama model fails, task mode retries comparable configured fallback models automatically +If no workspace root is provided outside the workflow, task mode falls back to a unique temporary output directory under `/tmp`. + Or dispatch directly via `gh` CLI (what your Copilot agent would call): ```bash diff --git a/scripts/hat b/scripts/hat index ce45814..1dc35d5 100755 --- a/scripts/hat +++ b/scripts/hat @@ -213,7 +213,7 @@ payload = { 'callback_repo': os.environ.get('HAT_CB_REPO', ''), 'callback_pr': os.environ.get('HAT_CB_PR', ''), 'callback_issue': os.environ.get('HAT_CB_ISSUE', ''), - 'hats': os.environ.get('HAT_HATS', ''), + 'hats': ','.join([h.strip() for h in os.environ.get('HAT_HATS', '').split(',') if h.strip()]), 'context': os.environ.get('HAT_CTX', ''), 'category': os.environ.get('HAT_CATEGORY', ''), 'genre': os.environ.get('HAT_GENRE', ''), diff --git a/scripts/hats_task_runner.py b/scripts/hats_task_runner.py index 2372330..3525e50 100644 --- a/scripts/hats_task_runner.py +++ b/scripts/hats_task_runner.py @@ -171,7 +171,7 @@ def build_run_id(explicit_run_id: str | None = None) -> str: github_run_id = os.environ.get("GITHUB_RUN_ID", "").strip() github_attempt = os.environ.get("GITHUB_RUN_ATTEMPT", "").strip() if github_run_id: - attempt_suffix = f"-attempt-{github_attempt}" if github_attempt else "" + attempt_suffix = f"-attempt-{github_attempt}" if github_attempt != "" else "" return f"run-{slugify_path_component(github_run_id, 'run')}{attempt_suffix}" return time.strftime("run-%Y%m%d-%H%M%S", time.gmtime()) @@ -529,7 +529,7 @@ def run_task_hat(config: dict, hat_id: str, task_type: str, timeout=hat_def.get("timeout_seconds", 300), ) if not result["error"]: - fallback_used = candidate_model != attempted_models[0] + fallback_used = candidate_model != model break elapsed = time.time() - start From 1c30f4506ea10788d0e516708581553bd21f1277 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 00:40:24 +0000 Subject: [PATCH 07/10] Clean up fallback validation details Agent-Logs-Url: https://github.com/Grumpified-OGGVCT/hat_stack/sessions/5e1526c3-bd4e-48fc-a842-28b9c5065464 Co-authored-by: AccidentalJedi <92951150+AccidentalJedi@users.noreply.github.com> --- scripts/hat | 2 +- scripts/hats_task_runner.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/scripts/hat b/scripts/hat index 1dc35d5..ce45814 100755 --- a/scripts/hat +++ b/scripts/hat @@ -213,7 +213,7 @@ payload = { 'callback_repo': os.environ.get('HAT_CB_REPO', ''), 'callback_pr': os.environ.get('HAT_CB_PR', ''), 'callback_issue': os.environ.get('HAT_CB_ISSUE', ''), - 'hats': ','.join([h.strip() for h in os.environ.get('HAT_HATS', '').split(',') if h.strip()]), + 'hats': os.environ.get('HAT_HATS', ''), 'context': os.environ.get('HAT_CTX', ''), 'category': os.environ.get('HAT_CATEGORY', ''), 'genre': os.environ.get('HAT_GENRE', ''), diff --git a/scripts/hats_task_runner.py b/scripts/hats_task_runner.py index 3525e50..c19b959 100644 --- a/scripts/hats_task_runner.py +++ b/scripts/hats_task_runner.py @@ -171,7 +171,7 @@ def build_run_id(explicit_run_id: str | None = None) -> str: github_run_id = os.environ.get("GITHUB_RUN_ID", "").strip() github_attempt = os.environ.get("GITHUB_RUN_ATTEMPT", "").strip() if github_run_id: - attempt_suffix = f"-attempt-{github_attempt}" if github_attempt != "" else "" + attempt_suffix = f"-attempt-{github_attempt}" if github_attempt else "" return f"run-{slugify_path_component(github_run_id, 'run')}{attempt_suffix}" return time.strftime("run-%Y%m%d-%H%M%S", time.gmtime()) @@ -197,7 +197,7 @@ def ensure_path_within_root(root: Path, candidate: Path) -> Path: def safe_output_path(output_dir: Path, relative_path: str) -> Path: """Return a safe output path within the output directory.""" if not relative_path.strip(): - raise ValueError(f"Unsafe generated file path: {relative_path!r}") + raise ValueError("Empty file path is not allowed") rel = Path(relative_path) if rel.is_absolute(): raise ValueError(f"Unsafe generated file path: {relative_path!r}") @@ -511,7 +511,6 @@ def run_task_hat(config: dict, hat_id: str, task_type: str, start = time.time() attempted_models = [] - fallback_used = False result = { "error": "All model attempts failed", "model": model, @@ -529,10 +528,10 @@ def run_task_hat(config: dict, hat_id: str, task_type: str, timeout=hat_def.get("timeout_seconds", 300), ) if not result["error"]: - fallback_used = candidate_model != model break elapsed = time.time() - start + fallback_used = not result["error"] and result["model"] != model report = { "hat_id": hat_id, From 646f11e50421354ffffb812e2523de48258b798e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 01:00:41 +0000 Subject: [PATCH 08/10] Fix review-thread task workspace issues Agent-Logs-Url: https://github.com/Grumpified-OGGVCT/hat_stack/sessions/4d97ddbd-e0d9-4fe7-b2b0-42f70dc12d65 Co-authored-by: AccidentalJedi <92951150+AccidentalJedi@users.noreply.github.com> --- .github/workflows/hats-task.yml | 35 +++++++++++++++++++++------------ scripts/hats_task_runner.py | 21 ++++++++++++++++---- 2 files changed, 39 insertions(+), 17 deletions(-) diff --git a/.github/workflows/hats-task.yml b/.github/workflows/hats-task.yml index 8d8a838..d0dc176 100644 --- a/.github/workflows/hats-task.yml +++ b/.github/workflows/hats-task.yml @@ -110,21 +110,30 @@ jobs: if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then # workflow_dispatch inputs are controlled by the UI schema — safe echo "task=${{ inputs.task }}" >> "$GITHUB_OUTPUT" - echo "hats=${{ inputs.hats }}" >> "$GITHUB_OUTPUT" - echo "callback_repo=${{ inputs.callback_repo }}" >> "$GITHUB_OUTPUT" - echo "callback_pr=${{ inputs.callback_pr }}" >> "$GITHUB_OUTPUT" - echo "callback_issue=${{ inputs.callback_issue }}" >> "$GITHUB_OUTPUT" - echo "category=${{ inputs.category }}" >> "$GITHUB_OUTPUT" - echo "genre=${{ inputs.genre }}" >> "$GITHUB_OUTPUT" - echo "project=${{ inputs.project }}" >> "$GITHUB_OUTPUT" - echo "workspace_root=${{ inputs.workspace_root }}" >> "$GITHUB_OUTPUT" - # Prompt may contain special chars — pass via env to avoid code injection - INPUT_PROMPT="${{ inputs.prompt }}" python3 -c " + # String inputs may contain newlines — sanitize before writing outputs + INPUT_PROMPT="${{ inputs.prompt }}" \ + INPUT_HATS="${{ inputs.hats }}" \ + INPUT_CALLBACK_REPO="${{ inputs.callback_repo }}" \ + INPUT_CALLBACK_PR="${{ inputs.callback_pr }}" \ + INPUT_CALLBACK_ISSUE="${{ inputs.callback_issue }}" \ + INPUT_CATEGORY="${{ inputs.category }}" \ + INPUT_GENRE="${{ inputs.genre }}" \ + INPUT_PROJECT="${{ inputs.project }}" \ + INPUT_WORKSPACE_ROOT="${{ inputs.workspace_root }}" \ + python3 -c " import os, re - val = os.environ.get('INPUT_PROMPT', '') - sanitized = re.sub(r'[\r\n]', ' ', val).strip() + def sanitize(value): + return re.sub(r'[\r\n]', ' ', value).strip() with open(os.environ['GITHUB_OUTPUT'], 'a') as f: - f.write(f'prompt={sanitized}\n') + f.write(f\"prompt={sanitize(os.environ.get('INPUT_PROMPT', ''))}\n\") + f.write(f\"hats={sanitize(os.environ.get('INPUT_HATS', ''))}\n\") + f.write(f\"callback_repo={sanitize(os.environ.get('INPUT_CALLBACK_REPO', ''))}\n\") + f.write(f\"callback_pr={sanitize(os.environ.get('INPUT_CALLBACK_PR', ''))}\n\") + f.write(f\"callback_issue={sanitize(os.environ.get('INPUT_CALLBACK_ISSUE', ''))}\n\") + f.write(f\"category={sanitize(os.environ.get('INPUT_CATEGORY', ''))}\n\") + f.write(f\"genre={sanitize(os.environ.get('INPUT_GENRE', ''))}\n\") + f.write(f\"project={sanitize(os.environ.get('INPUT_PROJECT', ''))}\n\") + f.write(f\"workspace_root={sanitize(os.environ.get('INPUT_WORKSPACE_ROOT', ''))}\n\") " else # repository_dispatch — extract from client_payload with sanitization diff --git a/scripts/hats_task_runner.py b/scripts/hats_task_runner.py index c19b959..c3d9189 100644 --- a/scripts/hats_task_runner.py +++ b/scripts/hats_task_runner.py @@ -164,7 +164,7 @@ def infer_project_slug(source_repo: str | None, task_type: str) -> str: def build_run_id(explicit_run_id: str | None = None) -> str: - """Build a deterministic run id for workspace storage.""" + """Build a run id for workspace storage, preferring GitHub run metadata when available.""" if explicit_run_id: return slugify_path_component(explicit_run_id, "run") @@ -282,7 +282,7 @@ def build_run_manifest( return { "schema_version": 1, "created_at_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), - "status": "completed", + "status": task_result.get("status", "completed"), "task_type": task_result["task_type"], "prompt": prompt, "requested_hats": requested_hats or [], @@ -316,7 +316,10 @@ def write_workspace_indexes(workspace_root: Path): for genre_dir in sorted(p for p in category_dir.iterdir() if p.is_dir()): for project_dir in sorted(p for p in genre_dir.iterdir() if p.is_dir()): - runs = sorted(p for p in project_dir.iterdir() if p.is_dir()) + runs = sorted( + (p for p in project_dir.iterdir() if p.is_dir()), + key=lambda path: (path.stat().st_mtime, path.name), + ) if not runs: continue latest = runs[-1] @@ -347,7 +350,7 @@ def write_workspace_indexes(workspace_root: Path): root_lines.append(f"| {category_name} | {project_count} |") root_lines.append("") root_lines.append( - "Folder layout: `playground/////`" + f"Folder layout: `{workspace_root}/////`" ) else: root_lines.append("_No playground runs yet._") @@ -692,8 +695,18 @@ def run_task_pipeline(config: dict, task_type: str, user_prompt: str, total_tokens["input"] += r["token_usage"]["input"] total_tokens["output"] += r["token_usage"]["output"] + primary_failed = bool(primary_result["error"]) and not primary_result["files"] + had_any_errors = any(result["error"] for result in all_results) + if primary_failed: + status = "failed" + elif had_any_errors: + status = "completed_with_warnings" + else: + status = "completed" + return { "task_type": task_type, + "status": status, "primary_hat": primary_hat, "files": primary_result["files"], "summary": primary_result["summary"], From acfe16ce9e9bb746e039ef3d3b51da89f2d95712 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 01:01:53 +0000 Subject: [PATCH 09/10] Polish review-thread follow-up fixes Agent-Logs-Url: https://github.com/Grumpified-OGGVCT/hat_stack/sessions/4d97ddbd-e0d9-4fe7-b2b0-42f70dc12d65 Co-authored-by: AccidentalJedi <92951150+AccidentalJedi@users.noreply.github.com> --- .github/workflows/hats-task.yml | 2 ++ scripts/hats_task_runner.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/hats-task.yml b/.github/workflows/hats-task.yml index d0dc176..0178378 100644 --- a/.github/workflows/hats-task.yml +++ b/.github/workflows/hats-task.yml @@ -123,6 +123,8 @@ jobs: python3 -c " import os, re def sanitize(value): + if not value: + return '' return re.sub(r'[\r\n]', ' ', value).strip() with open(os.environ['GITHUB_OUTPUT'], 'a') as f: f.write(f\"prompt={sanitize(os.environ.get('INPUT_PROMPT', ''))}\n\") diff --git a/scripts/hats_task_runner.py b/scripts/hats_task_runner.py index c3d9189..bb8e767 100644 --- a/scripts/hats_task_runner.py +++ b/scripts/hats_task_runner.py @@ -695,6 +695,8 @@ def run_task_pipeline(config: dict, task_type: str, user_prompt: str, total_tokens["input"] += r["token_usage"]["input"] total_tokens["output"] += r["token_usage"]["output"] + # Treat the task as failed only when the primary generation failed outright; + # supporting/gold hat errors should surface as warnings if deliverables still exist. primary_failed = bool(primary_result["error"]) and not primary_result["files"] had_any_errors = any(result["error"] for result in all_results) if primary_failed: From da504f64c27bc334b9d6403557f17e4e5149cfc0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 01:02:58 +0000 Subject: [PATCH 10/10] Finalize review-thread cleanup Agent-Logs-Url: https://github.com/Grumpified-OGGVCT/hat_stack/sessions/4d97ddbd-e0d9-4fe7-b2b0-42f70dc12d65 Co-authored-by: AccidentalJedi <92951150+AccidentalJedi@users.noreply.github.com> --- .github/workflows/hats-task.yml | 5 ++--- scripts/hats_task_runner.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/hats-task.yml b/.github/workflows/hats-task.yml index 0178378..18b3773 100644 --- a/.github/workflows/hats-task.yml +++ b/.github/workflows/hats-task.yml @@ -123,9 +123,8 @@ jobs: python3 -c " import os, re def sanitize(value): - if not value: - return '' - return re.sub(r'[\r\n]', ' ', value).strip() + text = str(value or '') + return re.sub(r'[\r\n]', ' ', text).strip() with open(os.environ['GITHUB_OUTPUT'], 'a') as f: f.write(f\"prompt={sanitize(os.environ.get('INPUT_PROMPT', ''))}\n\") f.write(f\"hats={sanitize(os.environ.get('INPUT_HATS', ''))}\n\") diff --git a/scripts/hats_task_runner.py b/scripts/hats_task_runner.py index bb8e767..f1013ae 100644 --- a/scripts/hats_task_runner.py +++ b/scripts/hats_task_runner.py @@ -697,7 +697,7 @@ def run_task_pipeline(config: dict, task_type: str, user_prompt: str, # Treat the task as failed only when the primary generation failed outright; # supporting/gold hat errors should surface as warnings if deliverables still exist. - primary_failed = bool(primary_result["error"]) and not primary_result["files"] + primary_failed = primary_result["error"] and not primary_result["files"] had_any_errors = any(result["error"] for result in all_results) if primary_failed: status = "failed"