From 06a5a1e6a5a8d8a5c48232b2ecf8aa0205145936 Mon Sep 17 00:00:00 2001 From: Pavle Petrovic Date: Tue, 16 Jun 2026 16:54:51 +0200 Subject: [PATCH 1/4] actions: add run-with-log composite action Wraps a run step to tee output to a .log and append a completion marker, so a GitHub timeout-minutes kill (marker absent) is distinguishable from a crash. Replaces the tee/PIPESTATUS block copied across test workflows. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/actions/run-with-log/README.md | 48 +++++++++++++++++++++++++ .github/actions/run-with-log/action.yml | 47 ++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 .github/actions/run-with-log/README.md create mode 100644 .github/actions/run-with-log/action.yml diff --git a/.github/actions/run-with-log/README.md b/.github/actions/run-with-log/README.md new file mode 100644 index 0000000..ec264f6 --- /dev/null +++ b/.github/actions/run-with-log/README.md @@ -0,0 +1,48 @@ +# `run-with-log` + +Run a bash script like a normal `run:` step, but capture its combined output to +a `.log` file and append a completion marker as the final line: + +``` +[==log-finish-line==] exit_code= +``` + +The marker is a generic end-of-run sentinel. Its **absence** means the shell was +killed before finishing — most usefully a GitHub `timeout-minutes` kill, which +leaves no trace in the log otherwise. Tooling that reads the logs (e.g. the +`ai_summary` parser) uses this to tell a timeout apart from a crash. + +Replaces hand-rolled `tee` / `PIPESTATUS` blocks copied across workflows. + +## Usage + +```yaml +- name: ${{ matrix.test-group.name }} + timeout-minutes: ${{ matrix.test-group.timeout }} + uses: tenstorrent/tt-github-actions/.github/actions/run-with-log@main + with: + log-file: generated/test_logs/${{ matrix.test-group.name }}.log + run: | + pytest models/demos/... -xv + ./some_other_command +``` + +## Inputs + +| Name | Required | Default | Description | +|------|----------|---------|-------------| +| `run` | yes | — | Bash script, exactly as a `run:` step body. | +| `log-file` | yes | — | Path for the `.log`, relative to `working-directory`. Parent dirs are created. | +| `working-directory` | no | `/work` | Defaults to the Tenstorrent container workdir; override per call as on a `run:` step. Needed because composite steps don't inherit the job's `defaults.run.working-directory`. | + +## What propagates + +- **`if:`, `env:`, `timeout-minutes`** on the calling step — native; `timeout-minutes` + is what makes the marker meaningful (the kill skips it). +- **Exit code / pass-fail** — the script's real exit code is forwarded, so the + step fails when the script fails and `failure()` / `if:` downstream work. +- **The script itself** runs under `bash -eo pipefail` (same as a `run:` step). + +**Does not propagate:** named `$GITHUB_OUTPUT` written inside the script — a +composite action can only expose declared outputs, not arbitrary ones. Steps +that must set outputs should stay plain `run:` steps. diff --git a/.github/actions/run-with-log/action.yml b/.github/actions/run-with-log/action.yml new file mode 100644 index 0000000..3979196 --- /dev/null +++ b/.github/actions/run-with-log/action.yml @@ -0,0 +1,47 @@ +name: Run with log +description: >- + Run a bash script like a normal `run:` step, tee its combined output to a + .log file, and append a completion marker as the final line. The marker is a + generic end-of-run sentinel: its absence means the shell was killed before + finishing (e.g. GitHub timeout-minutes), which downstream tooling can detect. + +inputs: + run: + description: "Bash script to run, exactly as a `run:` step body (multi-line)." + required: true + log-file: + description: "Path for the .log, relative to working-directory. Parent dirs are created." + required: true + working-directory: + description: >- + Directory to run in. Defaults to /work (Tenstorrent container jobs). + Override per call as you would on a run: step. Required because composite + steps do not inherit the job's defaults.run.working-directory. + required: false + default: "/work" + +runs: + using: composite + steps: + - shell: bash + working-directory: ${{ inputs.working-directory }} + env: + # Via env so the script's quotes/specials reach the file verbatim. + RUN_WITH_LOG_SCRIPT: ${{ inputs.run }} + RUN_WITH_LOG_FILE: ${{ inputs.log-file }} + run: | + # errexit off for the wrapper so the marker is written even when the + # script fails — a failed run must stay a failure, not look like a kill. + set +e + mkdir -p "$(dirname "$RUN_WITH_LOG_FILE")" + + # Run as its own bash file, matching a normal run step (errexit + + # pipefail), so the script keeps full fidelity and its own exit code. + script=$(mktemp) + printf '%s\n' "$RUN_WITH_LOG_SCRIPT" > "$script" + bash --noprofile --norc -eo pipefail "$script" 2>&1 | tee "$RUN_WITH_LOG_FILE" + rc=${PIPESTATUS[0]} + rm -f "$script" + + printf '[==log-finish-line==] exit_code=%s\n' "$rc" >> "$RUN_WITH_LOG_FILE" + exit $rc From f03eb5b1fbf7096472ce306f9265d81f55a54ae1 Mon Sep 17 00:00:00 2001 From: Pavle Petrovic Date: Tue, 16 Jun 2026 17:02:43 +0200 Subject: [PATCH 2/4] test: add dispatch smoke test for run-with-log Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/test-run-with-log.yaml | 64 ++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 .github/workflows/test-run-with-log.yaml diff --git a/.github/workflows/test-run-with-log.yaml b/.github/workflows/test-run-with-log.yaml new file mode 100644 index 0000000..e7d0582 --- /dev/null +++ b/.github/workflows/test-run-with-log.yaml @@ -0,0 +1,64 @@ +name: "test: run-with-log" + +# Smoke test for the run-with-log action: a completed run writes the marker; +# a timeout-minutes kill does not. Dispatch-only (the timeout case waits ~60s). +on: + workflow_dispatch: + +jobs: + success-case: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/run-with-log + with: + working-directory: ${{ github.workspace }} + log-file: out/ok.log + run: | + echo hello + echo world + - name: Assert marker present, exit 0 + run: | + tail -3 out/ok.log + tail -1 out/ok.log | grep -qx '\[==log-finish-line==\] exit_code=0' + + failure-case: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/run-with-log + id: run + continue-on-error: true + with: + working-directory: ${{ github.workspace }} + log-file: out/fail.log + run: | + echo before + exit 7 + - name: Assert failure forwarded, marker carries exit code + run: | + test "${{ steps.run.outcome }}" = failure + tail -1 out/fail.log | grep -qx '\[==log-finish-line==\] exit_code=7' + + timeout-case: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/run-with-log + id: run + continue-on-error: true + timeout-minutes: 1 + with: + working-directory: ${{ github.workspace }} + log-file: out/timeout.log + run: | + echo started + sleep 120 + - name: Assert killed, marker absent + run: | + test "${{ steps.run.outcome }}" != success + grep -q '^started$' out/timeout.log + if grep -q log-finish-line out/timeout.log; then + echo "::error::marker present after timeout kill — bug"; exit 1 + fi + echo "marker correctly absent (outcome=${{ steps.run.outcome }})" From 9bed8b2c148a52115b8e31d46046c9e58c8460a5 Mon Sep 17 00:00:00 2001 From: Pavle Petrovic Date: Wed, 17 Jun 2026 07:47:30 +0200 Subject: [PATCH 3/4] test: assert run-with-log SUCCESS/TIMEOUT classification end-to-end Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/test-run-with-log.yaml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/.github/workflows/test-run-with-log.yaml b/.github/workflows/test-run-with-log.yaml index e7d0582..1931e61 100644 --- a/.github/workflows/test-run-with-log.yaml +++ b/.github/workflows/test-run-with-log.yaml @@ -21,6 +21,17 @@ jobs: run: | tail -3 out/ok.log tail -1 out/ok.log | grep -qx '\[==log-finish-line==\] exit_code=0' + - name: Summarize (no LLM — clean log short-circuits) + uses: ./.github/actions/ai_summary/job + with: + config: '{"model":"none","workspace":"${{ github.workspace }}","input_dirs":["out"],"output_dir":"out/summaries"}' + api-key: "" + api-url: "" + job-name: smoke-success + - name: Assert classified SUCCESS + run: | + f=$(ls out/summaries/*.json); cat "${f%.json}.md" + python3 -c "import json; s=json.load(open('$f'))['_job']['status']; assert s=='SUCCESS', s" failure-case: runs-on: ubuntu-latest @@ -62,3 +73,14 @@ jobs: echo "::error::marker present after timeout kill — bug"; exit 1 fi echo "marker correctly absent (outcome=${{ steps.run.outcome }})" + - name: Summarize (no LLM — marker-absent short-circuits to TIMEOUT) + uses: ./.github/actions/ai_summary/job + with: + config: '{"model":"none","workspace":"${{ github.workspace }}","input_dirs":["out"],"output_dir":"out/summaries"}' + api-key: "" + api-url: "" + job-name: smoke-timeout + - name: Assert classified TIMEOUT + run: | + f=$(ls out/summaries/*.json); cat "${f%.json}.md" + python3 -c "import json; s=json.load(open('$f'))['_job']['status']; assert s=='TIMEOUT', s" From c2ab7869cc6e4fcb7ea7eaad7017d29e2aff1845 Mon Sep 17 00:00:00 2001 From: Pavle Petrovic Date: Wed, 17 Jun 2026 14:38:34 +0200 Subject: [PATCH 4/4] run-with-log: namespace marker token as tt-log-finish-line --- .github/actions/run-with-log/README.md | 2 +- .github/actions/run-with-log/action.yml | 2 +- .github/workflows/test-run-with-log.yaml | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/actions/run-with-log/README.md b/.github/actions/run-with-log/README.md index ec264f6..82533df 100644 --- a/.github/actions/run-with-log/README.md +++ b/.github/actions/run-with-log/README.md @@ -4,7 +4,7 @@ Run a bash script like a normal `run:` step, but capture its combined output to a `.log` file and append a completion marker as the final line: ``` -[==log-finish-line==] exit_code= +[==tt-log-finish-line==] exit_code= ``` The marker is a generic end-of-run sentinel. Its **absence** means the shell was diff --git a/.github/actions/run-with-log/action.yml b/.github/actions/run-with-log/action.yml index 3979196..4a69ad4 100644 --- a/.github/actions/run-with-log/action.yml +++ b/.github/actions/run-with-log/action.yml @@ -43,5 +43,5 @@ runs: rc=${PIPESTATUS[0]} rm -f "$script" - printf '[==log-finish-line==] exit_code=%s\n' "$rc" >> "$RUN_WITH_LOG_FILE" + printf '[==tt-log-finish-line==] exit_code=%s\n' "$rc" >> "$RUN_WITH_LOG_FILE" exit $rc diff --git a/.github/workflows/test-run-with-log.yaml b/.github/workflows/test-run-with-log.yaml index 1931e61..041fe8f 100644 --- a/.github/workflows/test-run-with-log.yaml +++ b/.github/workflows/test-run-with-log.yaml @@ -20,7 +20,7 @@ jobs: - name: Assert marker present, exit 0 run: | tail -3 out/ok.log - tail -1 out/ok.log | grep -qx '\[==log-finish-line==\] exit_code=0' + tail -1 out/ok.log | grep -qx '\[==tt-log-finish-line==\] exit_code=0' - name: Summarize (no LLM — clean log short-circuits) uses: ./.github/actions/ai_summary/job with: @@ -49,7 +49,7 @@ jobs: - name: Assert failure forwarded, marker carries exit code run: | test "${{ steps.run.outcome }}" = failure - tail -1 out/fail.log | grep -qx '\[==log-finish-line==\] exit_code=7' + tail -1 out/fail.log | grep -qx '\[==tt-log-finish-line==\] exit_code=7' timeout-case: runs-on: ubuntu-latest @@ -69,7 +69,7 @@ jobs: run: | test "${{ steps.run.outcome }}" != success grep -q '^started$' out/timeout.log - if grep -q log-finish-line out/timeout.log; then + if grep -q tt-log-finish-line out/timeout.log; then echo "::error::marker present after timeout kill — bug"; exit 1 fi echo "marker correctly absent (outcome=${{ steps.run.outcome }})"