tenstorrent · ppetrovicTT · Jun 11, 2026 · Jun 17, 2026
@@ -55,6 +55,17 @@ The action takes inline JSON — no separate config file. Required fields:
   `input_dirs`/`output_dir` pass through unchanged.
 - `input_dirs` — directories with `.log` files to analyze.
 - `output_dir` — where to write the per-job summary.
+- `log_complete_marker` (optional override) — regex for the completion
+  marker the caller's test wrapper appends as the final log line. Bundled
+  default matches `[==tt-log-finish-line==]`, with an optional
+  `exit_code=N` payload (group 1). Marker absent means the step's shell was
+  hard-killed — the GitHub `timeout-minutes` kill, invisible in the log
+  itself — so a clean-looking log classifies as TIMEOUT instead of a false
+  SUCCESS. When the log already shows a crash/failure, that status wins and
+  the truncation is reported as a possibly independent issue
+  (`_job.log_complete: false` in the JSON). Callers whose wrapper does not
+  write the marker must pass `"log_complete_marker": null`, otherwise every
+  passing job misclassifies as TIMEOUT.
 
 Categories, layers, and analysis patterns come from the bundled
 `analysis.yaml` shipped with the tool.

@@ -105,6 +105,7 @@ def _build_json(
                 "exit_code": extracted_log.exit_code,
                 "has_crash": extracted_log.has_crash,
                 "has_timeout": extracted_log.has_timeout,
+                "log_complete": extracted_log.log_complete,
                 "failed_tests": extracted_log.failed_tests,
             }
         )
@@ -176,6 +177,7 @@ def main():
     test_patterns = {
         "test_result_patterns": config.get("test_patterns", []),
         "failed_test_patterns": config.get("failed_test_patterns", []),
+        "log_complete_marker": config.get("log_complete_marker"),
     }
 
     # workspace anchors relative paths in input_dirs/output_dir; absolute
@@ -246,6 +248,12 @@ def _timeout_handler(signum, frame):
         job_status = get_job_status(extracted)
         print(f"Status: {job_status.status_text}", file=sys.stderr)
 
+        # "Would be SUCCESS if not for the absent completion marker." Lets the
+        # missing-dir infra paths below win over a marker-absence TIMEOUT.
+        clean_apart_from_marker = job_status.is_success or (
+            job_status.status_text == "TIMEOUT" and extracted.log_complete is False
+        )
+
         # Success + no missing dirs → done
         if job_status.is_success and not is_infra_failure:
             summary = FailureSummary()
@@ -272,8 +280,41 @@ def _timeout_handler(signum, frame):
             )
             return
 
+        # Clean log, marker absent, all dirs present → GitHub timeout-minutes
+        # kill. Authoritative; no LLM needed.
+        if (
+            job_status.status_text == "TIMEOUT"
+            and extracted.log_complete is False
+            and not is_infra_failure
+            and not extracted.error_sections
+        ):
+            summary = FailureSummary()
+            summary.category = "infra:timeout"
+            context = CIContext()
+            md = format_summary_markdown(
+                summary,
+                context,
+                job_status,
+                extracted_log=extracted,
+                job_name=job_name,
+                job_url=job_url,
+            )
+            _write_outputs(
+                output_dir,
+                job_id,
+                md,
+                _build_json(
+                    summary,
+                    job_status,
+                    extracted,
+                    job_name=job_name,
+                    job_url=job_url,
+                ),
+            )
+            return
+
         # Partial dirs + no errors → INFRA_FAILURE without LLM
-        if job_status.is_success and is_infra_failure and not extracted.error_sections:
+        if clean_apart_from_marker and is_infra_failure and not extracted.error_sections:
             summary = FailureSummary()
             summary.category = "infra:partial_logs"
             summary.root_cause = f"Missing log dirs: {missing_dirs}"

@@ -79,6 +79,7 @@ def load_config(project: dict | None = None) -> dict:
     config.setdefault("categories", {})
     config.setdefault("test_patterns", [])
     config.setdefault("failed_test_patterns", [])
+    config.setdefault("log_complete_marker", None)
     config.setdefault("detection_patterns", {})
     config.setdefault("repos", {"default_branches": ["main", "master", "dev"]})
 

@@ -3,6 +3,14 @@
 # Consumers extend via analysis_config: in their workflow YAML, and pick
 # the LLM model via the per-project config file's `model:` field.
 
+# Regex for the completion marker the caller's wrapper appends as the final
+# log line, e.g. '[==tt-log-finish-line==] exit_code=0'. Token presence is the
+# contract; the exit_code payload (group 1) is optional. Marker absent =>
+# shell was hard-killed (GitHub timeout-minutes) => TIMEOUT instead of false
+# SUCCESS. Callers whose wrapper does not write the marker must override
+# with null in their per-project config.
+log_complete_marker: '^\[==tt-log-finish-line==\]\s*(?:exit_code=(\d+))?'
+
 # Regexes scanned over the full log to set has_crash / has_timeout. Project
 # configs extend each group additively. Case-sensitivity is per group (see
 # the tool): crash/timeout match case-insensitively; python/killed do not.

@@ -89,6 +89,8 @@ class ExtractedLog:
     exit_code: int | None = None
     has_crash: bool = False  # TT_FATAL, panic, etc.
     has_timeout: bool = False
+    # None = marker not configured; False = marker absent (shell hard-killed)
+    log_complete: bool | None = None
     failed_tests: list[str] = field(default_factory=list)  # Test failures (features missing)
     failed_evals: list[str] = field(default_factory=list)  # Eval failures (accuracy below target)
 
@@ -183,6 +185,11 @@ def get_job_status(extracted_log: ExtractedLog) -> JobStatus:
         return JobStatus(False, "ORANGE", f"TESTS FAILED ({len(extracted_log.failed_tests)} failed)")
     if extracted_log.failed_evals:
         return JobStatus(False, "YELLOW", f"EVALS BELOW TARGET ({len(extracted_log.failed_evals)} failed)")
+    # Marker absent on an otherwise clean log = GitHub timeout-minutes kill.
+    # Crash/failure branches above win: truncation is then reported as a
+    # possibly separate issue, not as the status.
+    if extracted_log.log_complete is False:
+        return JobStatus(False, "RED", "TIMEOUT")
     return JobStatus(True, "GREEN", "SUCCESS")
 
 
@@ -414,6 +421,19 @@ def _any(groups: list[str], flags=0) -> bool:
     if match := re.search(r"Process completed with exit code (\d+)", full_text):
         result.exit_code = int(match.group(1))
 
+    # Completion marker: the caller's wrapper appends it as the log's final
+    # line; absence means the shell was hard-killed (GitHub timeout-minutes),
+    # which leaves no trace in the tee'd log itself.
+    marker_pattern = (test_patterns or {}).get("log_complete_marker")
+    if marker_pattern:
+        if match := re.search(marker_pattern, full_text, re.MULTILINE):
+            result.log_complete = True
+            # group 1 (optional) = wrapped command's exit code
+            if match.groups() and match.group(1) is not None and match.group(1).isdigit():
+                result.exit_code = int(match.group(1))
+        else:
+            result.log_complete = False
+
     # Use provided test patterns or default to empty
     if test_patterns is None:
         test_patterns = {}
@@ -920,6 +940,17 @@ def format_extracted_log(extracted: ExtractedLog) -> str:
                 for name, cfg in list(lc.framework.items())[:5]:
                     parts.append(f"  {name}: {cfg.value}")
 
+    if extracted.log_complete is False:
+        parts.append("\n" + "=" * 60)
+        parts.append("INCOMPLETE LOG")
+        parts.append("=" * 60)
+        parts.append(
+            "The log ended without its completion marker: the step was killed "
+            "by the GitHub timeout-minutes limit (test ran too long or hung). "
+            "Any errors below may be a separate, independent issue — do not "
+            "assume they caused the timeout."
+        )
+
     parts.append("\n" + "=" * 60)
     parts.append(f"ERROR SECTIONS ({len(extracted.error_sections)} found)")
     parts.append("=" * 60)

@@ -386,6 +386,12 @@ def format_summary_markdown(
     else:
         md = f"### {_emoji(job_status.status_code)} {job_status.status_text}\n"
 
+    if extracted_log and extracted_log.log_complete is False:
+        if job_status.status_text == "TIMEOUT":
+            md += "⚠️ Log ended without its completion marker — step killed by GitHub `timeout-minutes` (test ran too long or hung).\n"
+        else:
+            md += "⚠️ Log is incomplete — step killed by GitHub `timeout-minutes` after the failure below; possibly an independent hang in a later test.\n"
+
     # Show problematic layer if different from error layer
     layer_info = f"| **Error Layer** | `{summary.layer}` |"
     if summary.problematic_layer and summary.problematic_layer != summary.layer:

@@ -120,14 +120,14 @@ def test_empty(self):
 
 class TestResolveLogDirs:
     def test_all_present(self, tmp_path):
-        _write_log(tmp_path / "run_logs", "run.log", "some log content\n")
+        _write_log(tmp_path / "run_logs", "run.log", "some log content\n[==tt-log-finish-line==] exit_code=0\n")
         _write_log(tmp_path / "docker_server", "server.log", "server log\n")
         present, missing = _resolve_log_dirs(["run_logs", "docker_server"], tmp_path)
         assert len(present) == 2
         assert missing == []
 
     def test_one_missing(self, tmp_path):
-        _write_log(tmp_path / "run_logs", "run.log", "some log content\n")
+        _write_log(tmp_path / "run_logs", "run.log", "some log content\n[==tt-log-finish-line==] exit_code=0\n")
         present, missing = _resolve_log_dirs(["run_logs", "docker_server"], tmp_path)
         assert len(present) == 1
         assert missing == ["docker_server"]
@@ -145,7 +145,7 @@ def test_all_missing(self, tmp_path):
 
     def test_absolute_log_dirs_ignore_base(self, tmp_path):
         # Pathlib: Path(base) / Path(absolute) returns the absolute.
-        _write_log(tmp_path / "logs", "run.log", "ok\n")
+        _write_log(tmp_path / "logs", "run.log", "ok\n[==tt-log-finish-line==] exit_code=0\n")
         present, missing = _resolve_log_dirs([str(tmp_path / "logs")], Path("/nonexistent"))
         assert len(present) == 1
         assert missing == []
@@ -215,7 +215,7 @@ class TestWorkspace:
     """workspace field anchors relative input_dirs and output_dir."""
 
     def test_relative_input_dirs_resolve_against_workspace(self, tmp_path):
-        _write_log(tmp_path / "logs", "run.log", "INFO: ok\n")
+        _write_log(tmp_path / "logs", "run.log", "INFO: ok\n[==tt-log-finish-line==] exit_code=0\n")
         config = json.dumps(
             {
                 "model": "test",
@@ -228,7 +228,7 @@ def test_relative_input_dirs_resolve_against_workspace(self, tmp_path):
         assert any(tmp_path.glob("ai_job_summary*.md"))
 
     def test_relative_output_dir_resolves_against_workspace(self, tmp_path):
-        _write_log(tmp_path / "logs", "run.log", "INFO: ok\n")
+        _write_log(tmp_path / "logs", "run.log", "INFO: ok\n[==tt-log-finish-line==] exit_code=0\n")
         config = json.dumps(
             {
                 "model": "test",
@@ -245,7 +245,7 @@ def test_env_vars_in_paths_are_expanded(self, tmp_path, monkeypatch):
         # This is the only way to express the in-container workspace path
         # for container jobs (the ${{ github.workspace }} expression
         # always resolves to the host path).
-        _write_log(tmp_path / "logs", "run.log", "INFO: ok\n")
+        _write_log(tmp_path / "logs", "run.log", "INFO: ok\n[==tt-log-finish-line==] exit_code=0\n")
         monkeypatch.setenv("GITHUB_WORKSPACE", str(tmp_path))
         config = json.dumps(
             {
@@ -266,15 +266,15 @@ class TestDualOutput:
     """Tool always writes both .md and .json."""
 
     def test_writes_both_files(self, tmp_path):
-        _write_log(tmp_path / "run_logs", "run.log", "INFO: all good\n")
+        _write_log(tmp_path / "run_logs", "run.log", "INFO: all good\n[==tt-log-finish-line==] exit_code=0\n")
         _write_log(tmp_path / "docker_server", "server.log", "INFO: server ready\n")
         config = _config_json(tmp_path, ["run_logs", "docker_server"])
         _run_cli(["--config", config])
         assert any(tmp_path.glob("*.md"))
         assert any(tmp_path.glob("*.json"))
 
     def test_json_has_job_block(self, tmp_path):
-        _write_log(tmp_path / "run_logs", "run.log", "INFO: all good\n")
+        _write_log(tmp_path / "run_logs", "run.log", "INFO: all good\n[==tt-log-finish-line==] exit_code=0\n")
         _write_log(tmp_path / "docker_server", "server.log", "INFO: server ready\n")
         config = _config_json(tmp_path, ["run_logs", "docker_server"])
         _run_cli(["--config", config])
@@ -283,7 +283,7 @@ def test_json_has_job_block(self, tmp_path):
         assert "status" in data["_job"]
 
     def test_filename_uses_job_id_from_url(self, tmp_path):
-        _write_log(tmp_path / "run_logs", "run.log", "INFO: all good\n")
+        _write_log(tmp_path / "run_logs", "run.log", "INFO: all good\n[==tt-log-finish-line==] exit_code=0\n")
         _write_log(tmp_path / "docker_server", "server.log", "INFO: server ready\n")
         config = _config_json(tmp_path, ["run_logs", "docker_server"])
         _run_cli(["--config", config, "--job-url", "https://github.com/org/repo/actions/runs/1/job/99999"])
@@ -292,7 +292,7 @@ def test_filename_uses_job_id_from_url(self, tmp_path):
 
     def test_fallback_filename_without_job_url(self, tmp_path):
         """When no --job-url, files named ai_job_summary.md/.json (no ID suffix)."""
-        _write_log(tmp_path / "run_logs", "run.log", "INFO: all good\n")
+        _write_log(tmp_path / "run_logs", "run.log", "INFO: all good\n[==tt-log-finish-line==] exit_code=0\n")
         _write_log(tmp_path / "docker_server", "server.log", "INFO: server ready\n")
         config = _config_json(tmp_path, ["run_logs", "docker_server"])
         _run_cli(["--config", config])
@@ -517,6 +517,7 @@ def success_dir(self, tmp_path):
             2026-03-15 09:00:01,000 - server.py:50 - INFO: Server started on port 8000
             2026-03-15 09:01:30,500 - run_workflows.py:101 - INFO: workflow: benchmarks completed with return code: 0
             2026-03-15 09:02:46,000 - run.py:550 - INFO: All workflows completed successfully
+            [==tt-log-finish-line==] exit_code=0
         """
             ),
         )
@@ -542,6 +543,49 @@ def test_job_name_in_json(self, success_dir):
         assert data["_job"]["name"] == "Llama-3.1-8B (n150)"
 
 
+class TestTimeoutMarkerAbsent:
+    """Clean log without the completion marker → GitHub timeout-minutes kill."""
+
+    def test_clean_truncated_log_is_timeout_without_llm(self, tmp_path):
+        _write_log(tmp_path / "logs", "run.log", "INFO: running test_a\nINFO: running test_b\n")
+        config = _config_json(tmp_path, ["logs"])
+        with patch("ai_job_summary.cli.get_llm_client") as mock_llm:
+            _run_cli(["--config", config])
+            mock_llm.assert_not_called()
+        data = json.loads(_read_summary(tmp_path, ".json"))
+        assert data["_job"]["status"] == "TIMEOUT"
+        assert data["_job"]["log_complete"] is False
+
+    def test_marker_present_clean_log_is_success(self, tmp_path):
+        _write_log(tmp_path / "logs", "run.log", "INFO: ok\n[==tt-log-finish-line==] exit_code=0\n")
+        config = _config_json(tmp_path, ["logs"])
+        with patch("ai_job_summary.cli.get_llm_client") as mock_llm:
+            _run_cli(["--config", config])
+            mock_llm.assert_not_called()
+        data = json.loads(_read_summary(tmp_path, ".json"))
+        assert data["_job"]["status"] == "SUCCESS"
+
+    def test_missing_dir_wins_over_absent_marker(self, tmp_path):
+        # present log markerless + a dir missing → INFRA partial_logs, not TIMEOUT
+        _write_log(tmp_path / "run_logs", "run.log", "INFO: some log content\n")
+        config = _config_json(tmp_path, ["run_logs", "docker_server"])
+        with patch("ai_job_summary.cli.get_llm_client") as mock_llm:
+            _run_cli(["--config", config])
+            mock_llm.assert_not_called()
+        data = json.loads(_read_summary(tmp_path, ".json"))
+        assert data["_job"]["status"] == "INFRA FAILURE"
+        assert data["category"] == "infra:partial_logs"
+
+    def test_marker_disabled_keeps_clean_log_success(self, tmp_path):
+        _write_log(tmp_path / "logs", "run.log", "INFO: ok\n")
+        config = _config_json(tmp_path, ["logs"], log_complete_marker=None)
+        with patch("ai_job_summary.cli.get_llm_client") as mock_llm:
+            _run_cli(["--config", config])
+            mock_llm.assert_not_called()
+        data = json.loads(_read_summary(tmp_path, ".json"))
+        assert data["_job"]["status"] == "SUCCESS"
+
+
 # ── Crash: LLM overrides extraction (using real fixture + mock response) ──────