From 06a5a1e6a5a8d8a5c48232b2ecf8aa0205145936 Mon Sep 17 00:00:00 2001
From: Pavle Petrovic <ppetrovic@tenstorrent.com>
Date: Tue, 16 Jun 2026 16:54:51 +0200
Subject: [PATCH 1/4] actions: add run-with-log composite action

Wraps a run step to tee output to a .log and append a completion marker,
so a GitHub timeout-minutes kill (marker absent) is distinguishable from a
crash. Replaces the tee/PIPESTATUS block copied across test workflows.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .github/actions/run-with-log/README.md  | 48 +++++++++++++++++++++++++
 .github/actions/run-with-log/action.yml | 47 ++++++++++++++++++++++++
 2 files changed, 95 insertions(+)
 create mode 100644 .github/actions/run-with-log/README.md
 create mode 100644 .github/actions/run-with-log/action.yml
diff --git a/.github/actions/run-with-log/README.md b/.github/actions/run-with-log/README.md
new file mode 100644
index 0000000..ec264f6
--- /dev/null
+++ b/.github/actions/run-with-log/README.md
@@ -0,0 +1,48 @@
+# `run-with-log`
+
+Run a bash script like a normal `run:` step, but capture its combined output to
+a `.log` file and append a completion marker as the final line:
+
+```
+[==log-finish-line==] exit_code=<N>
+```
+
+The marker is a generic end-of-run sentinel. Its **absence** means the shell was
+killed before finishing — most usefully a GitHub `timeout-minutes` kill, which
+leaves no trace in the log otherwise. Tooling that reads the logs (e.g. the
+`ai_summary` parser) uses this to tell a timeout apart from a crash.
+
+Replaces hand-rolled `tee` / `PIPESTATUS` blocks copied across workflows.
+
+## Usage
+
+```yaml
+- name: ${{ matrix.test-group.name }}
+  timeout-minutes: ${{ matrix.test-group.timeout }}
+  uses: tenstorrent/tt-github-actions/.github/actions/run-with-log@main
+  with:
+    log-file: generated/test_logs/${{ matrix.test-group.name }}.log
+    run: |
+      pytest models/demos/... -xv
+      ./some_other_command
+```
+
+## Inputs
+
+| Name | Required | Default | Description |
+|------|----------|---------|-------------|
+| `run` | yes | — | Bash script, exactly as a `run:` step body. |
+| `log-file` | yes | — | Path for the `.log`, relative to `working-directory`. Parent dirs are created. |
+| `working-directory` | no | `/work` | Defaults to the Tenstorrent container workdir; override per call as on a `run:` step. Needed because composite steps don't inherit the job's `defaults.run.working-directory`. |
+
+## What propagates
+
+- **`if:`, `env:`, `timeout-minutes`** on the calling step — native; `timeout-minutes`
+  is what makes the marker meaningful (the kill skips it).
+- **Exit code / pass-fail** — the script's real exit code is forwarded, so the
+  step fails when the script fails and `failure()` / `if:` downstream work.
+- **The script itself** runs under `bash -eo pipefail` (same as a `run:` step).
+
+**Does not propagate:** named `$GITHUB_OUTPUT` written inside the script — a
+composite action can only expose declared outputs, not arbitrary ones. Steps
+that must set outputs should stay plain `run:` steps.
diff --git a/.github/actions/run-with-log/action.yml b/.github/actions/run-with-log/action.yml
new file mode 100644
index 0000000..3979196
--- /dev/null
+++ b/.github/actions/run-with-log/action.yml
@@ -0,0 +1,47 @@
+name: Run with log
+description: >-
+  Run a bash script like a normal `run:` step, tee its combined output to a
+  .log file, and append a completion marker as the final line. The marker is a
+  generic end-of-run sentinel: its absence means the shell was killed before
+  finishing (e.g. GitHub timeout-minutes), which downstream tooling can detect.
+
+inputs:
+  run:
+    description: "Bash script to run, exactly as a `run:` step body (multi-line)."
+    required: true
+  log-file:
+    description: "Path for the .log, relative to working-directory. Parent dirs are created."
+    required: true
+  working-directory:
+    description: >-
+      Directory to run in. Defaults to /work (Tenstorrent container jobs).
+      Override per call as you would on a run: step. Required because composite
+      steps do not inherit the job's defaults.run.working-directory.
+    required: false
+    default: "/work"
+
+runs:
+  using: composite
+  steps:
+    - shell: bash
+      working-directory: ${{ inputs.working-directory }}
+      env:
+        # Via env so the script's quotes/specials reach the file verbatim.
+        RUN_WITH_LOG_SCRIPT: ${{ inputs.run }}
+        RUN_WITH_LOG_FILE: ${{ inputs.log-file }}
+      run: |
+        # errexit off for the wrapper so the marker is written even when the
+        # script fails — a failed run must stay a failure, not look like a kill.
+        set +e
+        mkdir -p "$(dirname "$RUN_WITH_LOG_FILE")"
+
+        # Run as its own bash file, matching a normal run step (errexit +
+        # pipefail), so the script keeps full fidelity and its own exit code.
+        script=$(mktemp)
+        printf '%s\n' "$RUN_WITH_LOG_SCRIPT" > "$script"
+        bash --noprofile --norc -eo pipefail "$script" 2>&1 | tee "$RUN_WITH_LOG_FILE"
+        rc=${PIPESTATUS[0]}
+        rm -f "$script"
+
+        printf '[==log-finish-line==] exit_code=%s\n' "$rc" >> "$RUN_WITH_LOG_FILE"
+        exit $rc

From f03eb5b1fbf7096472ce306f9265d81f55a54ae1 Mon Sep 17 00:00:00 2001
From: Pavle Petrovic <ppetrovic@tenstorrent.com>
Date: Tue, 16 Jun 2026 17:02:43 +0200
Subject: [PATCH 2/4] test: add dispatch smoke test for run-with-log

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .github/workflows/test-run-with-log.yaml | 64 ++++++++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 .github/workflows/test-run-with-log.yaml

diff --git a/.github/workflows/test-run-with-log.yaml b/.github/workflows/test-run-with-log.yaml
new file mode 100644
index 0000000..e7d0582
--- /dev/null
+++ b/.github/workflows/test-run-with-log.yaml
@@ -0,0 +1,64 @@
+name: "test: run-with-log"
+
+# Smoke test for the run-with-log action: a completed run writes the marker;
+# a timeout-minutes kill does not. Dispatch-only (the timeout case waits ~60s).
+on:
+  workflow_dispatch:
+
+jobs:
+  success-case:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/run-with-log
+        with:
+          working-directory: ${{ github.workspace }}
+          log-file: out/ok.log
+          run: |
+            echo hello
+            echo world
+      - name: Assert marker present, exit 0
+        run: |
+          tail -3 out/ok.log
+          tail -1 out/ok.log | grep -qx '\[==log-finish-line==\] exit_code=0'
+
+  failure-case:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/run-with-log
+        id: run
+        continue-on-error: true
+        with:
+          working-directory: ${{ github.workspace }}
+          log-file: out/fail.log
+          run: |
+            echo before
+            exit 7
+      - name: Assert failure forwarded, marker carries exit code
+        run: |
+          test "${{ steps.run.outcome }}" = failure
+          tail -1 out/fail.log | grep -qx '\[==log-finish-line==\] exit_code=7'
+
+  timeout-case:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/run-with-log
+        id: run
+        continue-on-error: true
+        timeout-minutes: 1
+        with:
+          working-directory: ${{ github.workspace }}
+          log-file: out/timeout.log
+          run: |
+            echo started
+            sleep 120
+      - name: Assert killed, marker absent
+        run: |
+          test "${{ steps.run.outcome }}" != success
+          grep -q '^started$' out/timeout.log
+          if grep -q log-finish-line out/timeout.log; then
+            echo "::error::marker present after timeout kill — bug"; exit 1
+          fi
+          echo "marker correctly absent (outcome=${{ steps.run.outcome }})"

From 9bed8b2c148a52115b8e31d46046c9e58c8460a5 Mon Sep 17 00:00:00 2001
From: Pavle Petrovic <ppetrovic@tenstorrent.com>
Date: Wed, 17 Jun 2026 07:47:30 +0200
Subject: [PATCH 3/4] test: assert run-with-log SUCCESS/TIMEOUT classification
 end-to-end

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .github/workflows/test-run-with-log.yaml | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/.github/workflows/test-run-with-log.yaml b/.github/workflows/test-run-with-log.yaml
index e7d0582..1931e61 100644
--- a/.github/workflows/test-run-with-log.yaml
+++ b/.github/workflows/test-run-with-log.yaml
@@ -21,6 +21,17 @@ jobs:
         run: |
           tail -3 out/ok.log
           tail -1 out/ok.log | grep -qx '\[==log-finish-line==\] exit_code=0'
+      - name: Summarize (no LLM — clean log short-circuits)
+        uses: ./.github/actions/ai_summary/job
+        with:
+          config: '{"model":"none","workspace":"${{ github.workspace }}","input_dirs":["out"],"output_dir":"out/summaries"}'
+          api-key: ""
+          api-url: ""
+          job-name: smoke-success
+      - name: Assert classified SUCCESS
+        run: |
+          f=$(ls out/summaries/*.json); cat "${f%.json}.md"
+          python3 -c "import json; s=json.load(open('$f'))['_job']['status']; assert s=='SUCCESS', s"
 
   failure-case:
     runs-on: ubuntu-latest
@@ -62,3 +73,14 @@ jobs:
             echo "::error::marker present after timeout kill — bug"; exit 1
           fi
           echo "marker correctly absent (outcome=${{ steps.run.outcome }})"
+      - name: Summarize (no LLM — marker-absent short-circuits to TIMEOUT)
+        uses: ./.github/actions/ai_summary/job
+        with:
+          config: '{"model":"none","workspace":"${{ github.workspace }}","input_dirs":["out"],"output_dir":"out/summaries"}'
+          api-key: ""
+          api-url: ""
+          job-name: smoke-timeout
+      - name: Assert classified TIMEOUT
+        run: |
+          f=$(ls out/summaries/*.json); cat "${f%.json}.md"
+          python3 -c "import json; s=json.load(open('$f'))['_job']['status']; assert s=='TIMEOUT', s"

From c2ab7869cc6e4fcb7ea7eaad7017d29e2aff1845 Mon Sep 17 00:00:00 2001
From: Pavle Petrovic <ppetrovic@tenstorrent.com>
Date: Wed, 17 Jun 2026 14:38:34 +0200
Subject: [PATCH 4/4] run-with-log: namespace marker token as
 tt-log-finish-line

---
 .github/actions/run-with-log/README.md   | 2 +-
 .github/actions/run-with-log/action.yml  | 2 +-
 .github/workflows/test-run-with-log.yaml | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/actions/run-with-log/README.md b/.github/actions/run-with-log/README.md
index ec264f6..82533df 100644
--- a/.github/actions/run-with-log/README.md
+++ b/.github/actions/run-with-log/README.md
@@ -4,7 +4,7 @@ Run a bash script like a normal `run:` step, but capture its combined output to
 a `.log` file and append a completion marker as the final line:
 
 ```
-[==log-finish-line==] exit_code=<N>
+[==tt-log-finish-line==] exit_code=<N>
 ```
 
 The marker is a generic end-of-run sentinel. Its **absence** means the shell was
diff --git a/.github/actions/run-with-log/action.yml b/.github/actions/run-with-log/action.yml
index 3979196..4a69ad4 100644
--- a/.github/actions/run-with-log/action.yml
+++ b/.github/actions/run-with-log/action.yml
@@ -43,5 +43,5 @@ runs:
         rc=${PIPESTATUS[0]}
         rm -f "$script"
 
-        printf '[==log-finish-line==] exit_code=%s\n' "$rc" >> "$RUN_WITH_LOG_FILE"
+        printf '[==tt-log-finish-line==] exit_code=%s\n' "$rc" >> "$RUN_WITH_LOG_FILE"
         exit $rc
diff --git a/.github/workflows/test-run-with-log.yaml b/.github/workflows/test-run-with-log.yaml
index 1931e61..041fe8f 100644
--- a/.github/workflows/test-run-with-log.yaml
+++ b/.github/workflows/test-run-with-log.yaml
@@ -20,7 +20,7 @@ jobs:
       - name: Assert marker present, exit 0
         run: |
           tail -3 out/ok.log
-          tail -1 out/ok.log | grep -qx '\[==log-finish-line==\] exit_code=0'
+          tail -1 out/ok.log | grep -qx '\[==tt-log-finish-line==\] exit_code=0'
       - name: Summarize (no LLM — clean log short-circuits)
         uses: ./.github/actions/ai_summary/job
         with:
@@ -49,7 +49,7 @@ jobs:
       - name: Assert failure forwarded, marker carries exit code
         run: |
           test "${{ steps.run.outcome }}" = failure
-          tail -1 out/fail.log | grep -qx '\[==log-finish-line==\] exit_code=7'
+          tail -1 out/fail.log | grep -qx '\[==tt-log-finish-line==\] exit_code=7'
 
   timeout-case:
     runs-on: ubuntu-latest
@@ -69,7 +69,7 @@ jobs:
         run: |
           test "${{ steps.run.outcome }}" != success
           grep -q '^started$' out/timeout.log
-          if grep -q log-finish-line out/timeout.log; then
+          if grep -q tt-log-finish-line out/timeout.log; then
             echo "::error::marker present after timeout kill — bug"; exit 1
           fi
           echo "marker correctly absent (outcome=${{ steps.run.outcome }})"