From 98b3a0d40b6770aec78e32a1f3f2c3841542fb54 Mon Sep 17 00:00:00 2001
From: Vadym Mariiechko <vadimich348@gmail.com>
Date: Sun, 10 May 2026 15:00:53 +0200
Subject: [PATCH] Move agent-runtime tips out of SKILL.md into references/

Refactor the dbx-ro-query asset's operational notes so SKILL.md stays
agent-agnostic and per-runtime quirks live in
`<target_dir>/skills/dbx-ro-query/references/agent-<name>.md`. Matches
the agentskills.io references/ convention: load the file on demand
only when the agent hits a runtime-specific issue, rather than
carrying every vendor's quirks inline in the main SKILL.md.

Files added:

- `references/agent-claude-code.md`: 2-minute Bash tool default
  timeout (warehouse cold-start hint); exit-code echo pattern reframed
  for parseable rejection evidence (Claude Code's harness already
  surfaces non-zero exits to the model, so the echo is only useful for
  embedding the exit code in captured output text, not for failure
  detection).
- `references/agent-codex.md`: sandbox `network_access = true` setting
  to unblock GitHub URL fetches and Databricks calls; `login: false`
  on shell_command for clean captured output (was inline in SKILL.md);
  warning against PowerShell `Measure-Command` wrappers; warehouse
  env var note. Validated against a live Codex test session.
- `references/agent-cursor.md`: ready-to-paste
  `.cursor/rules/dbx-ro-query.mdc` rule snippet (Cursor does NOT
  auto-discover `.cursor/skills/`, only `.cursor/rules/*.mdc`);
  terminal runtime notes confirming exit codes are surfaced natively;
  warehouse warm-up note. Validated against a live Cursor 3.3.x /
  Composer 2 test session.

SKILL.md change: the runtime-specific bullet that named Codex inline
is replaced with a generic on-demand pointer to the references/
folder. Other operational notes stay generic and apply regardless of
runtime. Prevents SKILL.md from drifting toward a multi-vendor
compatibility matrix as more agents get documented.

success_message change: each per-agent section now points at its
references/agent-<name>.md file rather than restating wiring inline.
The Cursor section explicitly calls out that `.cursor/skills/` is
not auto-discovered, so users see this at install time, not after
their first failed query.

Asset README troubleshooting: documented two install-time pitfalls
surfaced by independent Codex and Cursor test runs. The first is a
generic Databricks CLI quirk: `bundle init` fails resolving a stale
DATABRICKS_CONFIG_PROFILE; workaround is to re-point the env var at
a valid profile. The second is Codex-specific: sandbox blocks the
GitHub URL fetch unless network_access = true is set.

Test updates: EXPECTED_FILES grows by three (claude-code, codex,
cursor); new tests assert SKILL.md points at references/ and every
references file starts with an H1 heading so on-demand loaders see
a clear scope title.

Test count: 2315 -> 2317. Full suite green.
---
 CHANGELOG.md                                  | 11 +++++
 assets/dbx-ro-query/README.md                 | 24 +++++++++++
 .../databricks_template_schema.json           |  2 +-
 .../skills/dbx-ro-query/SKILL.md              |  2 +-
 .../references/agent-claude-code.md           | 20 +++++++++
 .../dbx-ro-query/references/agent-codex.md    | 41 +++++++++++++++++++
 .../dbx-ro-query/references/agent-cursor.md   | 36 ++++++++++++++++
 .../dbx-ro-query/scripts/dbx-ro-query.py      |  1 -
 tests/assets/test_dbx_ro_query.py             | 23 +++++++++++
 9 files changed, 157 insertions(+), 3 deletions(-)
 create mode 100644 assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-claude-code.md
 create mode 100644 assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-codex.md
 create mode 100644 assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-cursor.md
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9e0cdd0..1927b22 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/).
 
 ## [Unreleased]
 
+### Added
+- **Asset `dbx-ro-query` per-agent references**: new `<target_dir>/skills/dbx-ro-query/references/` subfolder holding agent-runtime-specific operational tips. Files are loaded on demand by the parent `SKILL.md` when an agent hits a runtime quirk; this matches the [agentskills.io](https://agentskills.io) `references/` convention.
+  - `agent-claude-code.md`: 2-minute `Bash` tool default timeout (warehouse cold-start hint) plus an exit-code-echo pattern for parseable rejection evidence.
+  - `agent-codex.md`: sandbox `network_access = true` setting; `login: false` on `shell_command` for clean captured output; warning against `Measure-Command` PowerShell wrappers; warehouse env var note. Validated by a live Codex test session against the released v1.6.0 install.
+  - `agent-cursor.md`: ready-to-paste `.cursor/rules/dbx-ro-query.mdc` rule snippet (Cursor does NOT auto-discover `.cursor/skills/`); terminal runtime notes confirming exit codes are surfaced natively. Validated by a live Cursor 3.3.x / Composer 2 test session.
+- **Asset `dbx-ro-query` README troubleshooting**: documented two install-time pitfalls surfaced by independent Codex and Cursor test runs. The first is a generic Databricks CLI quirk: `bundle init` fails resolving a stale `DATABRICKS_CONFIG_PROFILE`; workaround is to re-point the env var at a valid profile. The second is Codex-specific: sandbox blocks the GitHub URL fetch unless `network_access = true` is set.
+
+### Changed
+- **Asset `dbx-ro-query` SKILL.md operational notes**: the runtime-specific bullet that named Codex inline has been replaced with a generic on-demand pointer to the new `references/` folder. The remaining operational notes are agent-agnostic. Keeps `SKILL.md` from drifting toward a multi-vendor compatibility matrix as more agents are documented.
+- **Asset `dbx-ro-query` `success_message`**: each per-agent section now points at its `references/agent-<name>.md` file rather than restating wiring inline. The Cursor section explicitly calls out that `.cursor/skills/` is not auto-discovered; users see this at install time, not after their first failed query.
+
 ## [1.6.0] - 2026-05-09
 
 ### Added
diff --git a/assets/dbx-ro-query/README.md b/assets/dbx-ro-query/README.md
index 61b9c83..74042d2 100644
--- a/assets/dbx-ro-query/README.md
+++ b/assets/dbx-ro-query/README.md
@@ -43,6 +43,30 @@ python <target_dir>/skills/dbx-ro-query/scripts/dbx-ro-query.py \
 
 See `<target_dir>/skills/dbx-ro-query/SKILL.md` for the full argument reference, output formats, supported SQL prefixes, and the rejection list.
 
+## Troubleshooting
+
+### `databricks bundle init` fails with `has no <profile> profile configured`
+
+The CLI tries to resolve a Databricks profile during `bundle init` even though template installation does not need workspace auth. If your shell or IDE has `DATABRICKS_CONFIG_PROFILE` pointing at a profile that no longer exists in `~/.databrickscfg`, install fails with `Error: resolve: <path>/.databrickscfg has no <name> profile configured`.
+
+Workaround: run the install with the env var pointed at a valid profile (or `DEFAULT`):
+
+```bash
+DATABRICKS_CONFIG_PROFILE=DEFAULT databricks bundle init https://github.com/vmariiechko/databricks-bundle-template \
+  --template-dir assets/dbx-ro-query
+```
+
+### Codex sandbox blocks the GitHub URL fetch
+
+If `databricks bundle init <github url>` fails inside Codex with `connectex: An attempt was made to access a socket in a way forbidden by its access permissions`, the Codex sandbox is blocking outbound network. Enable network access in `~/.codex/config.toml`:
+
+```toml
+[sandbox_workspace_write]
+network_access = true
+```
+
+Then restart Codex. See `<target_dir>/skills/dbx-ro-query/references/agent-codex.md` for the full Codex runtime checklist.
+
 ## What this asset is
 
 A standalone sub-template in the [databricks-bundle-template](https://github.com/vmariiechko/databricks-bundle-template) asset library. It does not depend on the core template; it can be installed into any Databricks bundle, or any project at all that uses the Databricks CLI. See [ASSETS.md](../../ASSETS.md) for the full catalog.
diff --git a/assets/dbx-ro-query/databricks_template_schema.json b/assets/dbx-ro-query/databricks_template_schema.json
index 6d93c6c..9ad5196 100644
--- a/assets/dbx-ro-query/databricks_template_schema.json
+++ b/assets/dbx-ro-query/databricks_template_schema.json
@@ -12,7 +12,7 @@
     }
   },
 
-  "success_message": "\nSkill installed at '{{.target_dir}}/skills/dbx-ro-query/'.\n\nNext steps:\n\n1. Wire the skill into your agent. Pick the line that matches your setup:\n\n   Claude Code\n   - Auto-discovery folder is `.claude/skills/`. If you installed to `.claude` directly, you're done.\n   - Otherwise add to CLAUDE.md:\n     > Use the skill at {{.target_dir}}/skills/dbx-ro-query/SKILL.md when running read-only SQL against Databricks.\n\n   Codex\n   - Add to AGENTS.md:\n     > Use the skill at {{.target_dir}}/skills/dbx-ro-query/SKILL.md for read-only Databricks SQL. Invoke `python {{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py \"<sql>\" \"<profile>\" \"<format>\"`.\n\n   Cursor\n   - Add a rule under `.cursor/rules/` (or `.cursorrules`) referencing {{.target_dir}}/skills/dbx-ro-query/SKILL.md.\n\n   Gemini CLI\n   - Reference {{.target_dir}}/skills/dbx-ro-query/SKILL.md from your `.gemini/` configuration.\n\n   Other / multi-agent\n   - Point your agent at {{.target_dir}}/skills/dbx-ro-query/SKILL.md however your runtime expects.\n\n2. Smoke check the wrapper:\n   python {{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py \"SELECT 1\" <your-profile> scalar\n",
+  "success_message": "\nSkill installed at '{{.target_dir}}/skills/dbx-ro-query/'.\n\nNext steps:\n\n1. Wire the skill into your agent. Pick the line that matches your setup:\n\n   Claude Code\n   - Auto-discovery folder is `.claude/skills/`. If you installed to `.claude` directly, you're done.\n   - Otherwise add to CLAUDE.md:\n     > Use the skill at {{.target_dir}}/skills/dbx-ro-query/SKILL.md when running read-only SQL against Databricks.\n   - Cold-start tips: see {{.target_dir}}/skills/dbx-ro-query/references/agent-claude-code.md.\n\n   Codex\n   - Add to AGENTS.md:\n     > Use the skill at {{.target_dir}}/skills/dbx-ro-query/SKILL.md for read-only Databricks SQL. Invoke `python {{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py \"<sql>\" \"<profile>\" \"<format>\"`.\n   - Sandbox network and shell quirks: see {{.target_dir}}/skills/dbx-ro-query/references/agent-codex.md.\n\n   Cursor\n   - Cursor does NOT auto-discover `.cursor/skills/`. Drop a rule at `.cursor/rules/dbx-ro-query.mdc` (the exact snippet is in {{.target_dir}}/skills/dbx-ro-query/references/agent-cursor.md).\n\n   Gemini CLI\n   - Reference {{.target_dir}}/skills/dbx-ro-query/SKILL.md from your `.gemini/` configuration.\n\n   Other / multi-agent\n   - Point your agent at {{.target_dir}}/skills/dbx-ro-query/SKILL.md however your runtime expects.\n\n2. Smoke check the wrapper:\n   python {{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py \"SELECT 1\" <your-profile> scalar\n",
 
   "min_databricks_cli_version": "v0.296.0",
   "version": 1
diff --git a/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/SKILL.md b/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/SKILL.md
index 91cb2ef..f1c64fd 100644
--- a/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/SKILL.md
+++ b/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/SKILL.md
@@ -72,7 +72,7 @@ python .agents/skills/dbx-ro-query/scripts/dbx-ro-query.py \
 ## Operational notes
 
 - Never call `databricks bundle deploy` or `databricks bundle run` from this skill. The skill's purpose is read-only evidence; deployment is out of scope.
-- If your shell runtime has a login/profile option, disable it for invocations of this script. In Codex, set `login: false` on `shell_command`. If output contains valid TSV/results followed by `oh-my-posh`, `Terminal-Icons`, `ResourceUnavailable`, or `Export-Clixml` errors, rerun with the parent shell's no-profile / non-login option.
+- For agent-runtime-specific quirks (login chains, command timeouts, captured-output noise, exit-code visibility, sandbox auth), load the file under `references/` matching your runtime: `references/agent-claude-code.md`, `references/agent-codex.md`, etc. The notes below this bullet are generic and apply regardless of runtime.
 - If Databricks auth, token cache, or profile access fails because of sandbox restrictions, rerun the same command with the runtime's elevated / outside-sandbox execution mechanism. Do not call raw `databricks experimental aitools tools query` to work around the wrapper.
 - Use `python3` on Unix hosts if `python` does not point to Python 3. The script is Python 3.9+, no third-party dependencies.
 - Exit code is 0 on success, non-zero on validation failure or upstream CLI error. Stderr carries the upstream error message; stdout carries the formatted result.
diff --git a/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-claude-code.md b/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-claude-code.md
new file mode 100644
index 0000000..67cb682
--- /dev/null
+++ b/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-claude-code.md
@@ -0,0 +1,20 @@
+# Claude Code runtime tips
+
+Load this file on demand when running `dbx-ro-query` inside Claude Code. Skip it for other runtimes.
+
+## Bash tool default timeout (2 minutes)
+
+The `Bash` tool defaults to a 120 000 ms (2-minute) timeout. Cold-starting a Databricks SQL warehouse can exceed this: serverless warehouses typically take 30 to 60 seconds, classic warehouses 3 to 5 minutes. If the first invocation against a stopped warehouse times out, set the `Bash` tool's `timeout` parameter higher (for example 300 000 ms) for the warm-up call, then drop back to the default for subsequent queries.
+
+## Capturing rejections as parseable evidence
+
+Claude Code's harness already surfaces non-zero exit codes to the model when a `Bash` call fails, so you do not need any extra wiring to *detect* a wrapper rejection. You only need extra wiring to *embed* the exit code in the captured output text, which is useful when logging a rejection as machine-readable evidence in a report.
+
+For evidence captures, append `; echo "exit=$?"` to the invocation:
+
+```bash
+python .agents/skills/dbx-ro-query/scripts/dbx-ro-query.py \
+  "DROP TABLE foo" "<your-profile>" 2>&1; echo "exit=$?"
+```
+
+The captured output then ends with `exit=1` (or whatever the wrapper returned), which downstream consumers can grep without parsing the harness's structured error block.
diff --git a/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-codex.md b/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-codex.md
new file mode 100644
index 0000000..47f4907
--- /dev/null
+++ b/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-codex.md
@@ -0,0 +1,41 @@
+# Codex runtime tips
+
+Load this file on demand when running `dbx-ro-query` inside Codex. Skip it for other runtimes.
+
+## Sandbox network access
+
+If `databricks` calls fail with errors like `connectex: An attempt was made to access a socket in a way forbidden by its access permissions`, the Codex sandbox is blocking outbound network. Enable network access in `~/.codex/config.toml`:
+
+```toml
+[sandbox_workspace_write]
+network_access = true
+```
+
+Restart Codex after the config change. The same block can manifest as `databricks bundle init` failing against a GitHub URL or as auth/warehouse calls timing out; both clear once the sandbox is allowed to reach the network.
+
+## Disable the shell login chain for wrapper invocations
+
+If `shell_command` runs through a login shell, captured output may end up polluted with profile noise such as `oh-my-posh`, `Terminal-Icons`, `Set-PSReadLineOption`, `ResourceUnavailable`, or `Export-Clixml` errors. These appear *after* the wrapper's actual output, so the SQL result itself is correct but the captured text is hard to paste cleanly into evidence.
+
+Set `login: false` on `shell_command` for invocations of `dbx-ro-query.py`:
+
+```yaml
+shell_command:
+  login: false
+```
+
+If you see a valid TSV or scalar result followed by such errors, rerun with `login: false` (or your local equivalent) to get clean captured output.
+
+## Exit codes are already visible
+
+Codex surfaces `Exit code: N` directly in `shell_command` results to the model. You do not need to append `; echo "exit=$?"` to detect failures. Only add it when you specifically want the exit code embedded in the captured output text for parseable evidence logs.
+
+Avoid wrapping the wrapper with PowerShell `Measure-Command`. It can hide the child process exit code from Codex.
+
+## Warehouse selection
+
+The CLI auto-detects an available warehouse. For deterministic test matrices across sessions or machines, set `DATABRICKS_WAREHOUSE_ID=<id>` explicitly, either as an env var or via your shell config. First post-cold-start call typically takes 20-30 seconds; subsequent warm calls return in 2-3 seconds.
+
+## Per-command timeout
+
+Codex does not impose a fixed default; tune `timeout_ms` per call. `timeout_ms: 300000` (5 minutes) covers warehouse cold starts comfortably without making fast queries feel sluggish.
diff --git a/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-cursor.md b/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-cursor.md
new file mode 100644
index 0000000..e56d409
--- /dev/null
+++ b/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-cursor.md
@@ -0,0 +1,36 @@
+# Cursor runtime tips
+
+Load this file on demand when running `dbx-ro-query` inside Cursor (Composer or any IDE-resident agent). Skip it for other runtimes. Verified against Cursor 3.3.x with the Composer 2 model.
+
+## Wiring: rules live under `.cursor/rules/`, not `.cursor/skills/`
+
+Cursor does not auto-discover arbitrary markdown under `.cursor/skills/`. Project-rule discovery is `.cursor/rules/*.mdc`. After installing this asset with `target_dir: .cursor`, drop a rule file at `.cursor/rules/dbx-ro-query.mdc` so Composer surfaces the skill on every prompt.
+
+Suggested content for `.cursor/rules/dbx-ro-query.mdc`:
+
+````markdown
+---
+description: Read-only Databricks SQL via the dbx-ro-query skill
+alwaysApply: true
+---
+
+When you need to query Databricks (schema discovery, row sampling, aggregations, EXPLAIN — anything that reads but must not mutate), follow the skill at `.cursor/skills/dbx-ro-query/SKILL.md`. Invoke the bundled wrapper:
+
+```
+python .cursor/skills/dbx-ro-query/scripts/dbx-ro-query.py "<sql>" "<profile>" "<format>"
+```
+
+Do not call `databricks experimental aitools tools query` directly; the wrapper enforces a read-only guard.
+````
+
+The legacy `.cursorrules` file at the project root may still work in some setups, but `.cursor/rules/` is Cursor's current documented layout. Use `alwaysApply: true` so the rule is in scope without the user having to invoke it manually.
+
+## Terminal runtime
+
+- Cursor's terminal tool surfaces stdout, stderr, and `exit_code` to the model on every command. You do not need to append `; echo "exit=$?"` to detect failures.
+- First query against a stopped or cold warehouse takes a few seconds (warm-up). Subsequent queries on a warm warehouse return in ~2-3 seconds. No observed timeout in default Composer settings.
+- Output capture is clean. No login-shell or status-decoration noise has been observed.
+
+## Profile mapping
+
+The `<profile>` argument passed to the wrapper is a Databricks CLI profile name from `~/.databrickscfg`, not the `DATABRICKS_CONFIG_PROFILE` env var. The two only intersect when the user has explicitly exported the env var. Pass the profile explicitly to keep behavior deterministic.
diff --git a/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py b/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py
index 1d69874..53ae6f7 100644
--- a/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py
+++ b/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py
@@ -27,7 +27,6 @@
 from collections.abc import Iterable
 from typing import Any
 
-
 ALLOWED_FORMATS = ("auto", "scalar", "lines", "csv", "tsv", "json")
 
 # Only statements that start with one of these tokens are allowed through.
diff --git a/tests/assets/test_dbx_ro_query.py b/tests/assets/test_dbx_ro_query.py
index 32de5a6..4c44ac3 100644
--- a/tests/assets/test_dbx_ro_query.py
+++ b/tests/assets/test_dbx_ro_query.py
@@ -23,6 +23,9 @@
 EXPECTED_FILES = (
     "skills/dbx-ro-query/SKILL.md",
     "skills/dbx-ro-query/scripts/dbx-ro-query.py",
+    "skills/dbx-ro-query/references/agent-claude-code.md",
+    "skills/dbx-ro-query/references/agent-codex.md",
+    "skills/dbx-ro-query/references/agent-cursor.md",
 )
 
 
@@ -79,6 +82,26 @@ def test_skill_frontmatter_well_formed(installed: Path):
     assert "description:" in front, "SKILL.md frontmatter missing `description`"
 
 
+def test_skill_references_pointer_present(installed: Path):
+    """SKILL.md operational notes must point readers at the references/ folder.
+
+    The references/ subfolder holds per-agent runtime tips (loaded on demand).
+    SKILL.md needs to surface this discovery hint so agents know to look there
+    when they hit a runtime quirk."""
+    skill = installed / DEFAULT_TARGET / "skills" / "dbx-ro-query" / "SKILL.md"
+    text = skill.read_text(encoding="utf-8")
+    assert "references/" in text, "SKILL.md missing pointer to references/ folder"
+
+
+def test_references_files_have_headings(installed: Path):
+    """Every references file must start with a Markdown H1 so agents loading
+    it on demand see a clear scope title."""
+    refs_dir = installed / DEFAULT_TARGET / "skills" / "dbx-ro-query" / "references"
+    for ref in refs_dir.glob("agent-*.md"):
+        first_line = ref.read_text(encoding="utf-8").splitlines()[0]
+        assert first_line.startswith("# "), f"{ref.name} missing H1 heading on first line"
+
+
 @pytest.mark.parametrize(
     "sql",
     [