From 98b3a0d40b6770aec78e32a1f3f2c3841542fb54 Mon Sep 17 00:00:00 2001 From: Vadym Mariiechko Date: Sun, 10 May 2026 15:00:53 +0200 Subject: [PATCH] Move agent-runtime tips out of SKILL.md into references/ Refactor the dbx-ro-query asset's operational notes so SKILL.md stays agent-agnostic and per-runtime quirks live in `/skills/dbx-ro-query/references/agent-.md`. Matches the agentskills.io references/ convention: load the file on demand only when the agent hits a runtime-specific issue, rather than carrying every vendor's quirks inline in the main SKILL.md. Files added: - `references/agent-claude-code.md`: 2-minute Bash tool default timeout (warehouse cold-start hint); exit-code echo pattern reframed for parseable rejection evidence (Claude Code's harness already surfaces non-zero exits to the model, so the echo is only useful for embedding the exit code in captured output text, not for failure detection). - `references/agent-codex.md`: sandbox `network_access = true` setting to unblock GitHub URL fetches and Databricks calls; `login: false` on shell_command for clean captured output (was inline in SKILL.md); warning against PowerShell `Measure-Command` wrappers; warehouse env var note. Validated against a live Codex test session. - `references/agent-cursor.md`: ready-to-paste `.cursor/rules/dbx-ro-query.mdc` rule snippet (Cursor does NOT auto-discover `.cursor/skills/`, only `.cursor/rules/*.mdc`); terminal runtime notes confirming exit codes are surfaced natively; warehouse warm-up note. Validated against a live Cursor 3.3.x / Composer 2 test session. SKILL.md change: the runtime-specific bullet that named Codex inline is replaced with a generic on-demand pointer to the references/ folder. Other operational notes stay generic and apply regardless of runtime. Prevents SKILL.md from drifting toward a multi-vendor compatibility matrix as more agents get documented. success_message change: each per-agent section now points at its references/agent-.md file rather than restating wiring inline. The Cursor section explicitly calls out that `.cursor/skills/` is not auto-discovered, so users see this at install time, not after their first failed query. Asset README troubleshooting: documented two install-time pitfalls surfaced by independent Codex and Cursor test runs. The first is a generic Databricks CLI quirk: `bundle init` fails resolving a stale DATABRICKS_CONFIG_PROFILE; workaround is to re-point the env var at a valid profile. The second is Codex-specific: sandbox blocks the GitHub URL fetch unless network_access = true is set. Test updates: EXPECTED_FILES grows by three (claude-code, codex, cursor); new tests assert SKILL.md points at references/ and every references file starts with an H1 heading so on-demand loaders see a clear scope title. Test count: 2315 -> 2317. Full suite green. --- CHANGELOG.md | 11 +++++ assets/dbx-ro-query/README.md | 24 +++++++++++ .../databricks_template_schema.json | 2 +- .../skills/dbx-ro-query/SKILL.md | 2 +- .../references/agent-claude-code.md | 20 +++++++++ .../dbx-ro-query/references/agent-codex.md | 41 +++++++++++++++++++ .../dbx-ro-query/references/agent-cursor.md | 36 ++++++++++++++++ .../dbx-ro-query/scripts/dbx-ro-query.py | 1 - tests/assets/test_dbx_ro_query.py | 23 +++++++++++ 9 files changed, 157 insertions(+), 3 deletions(-) create mode 100644 assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-claude-code.md create mode 100644 assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-codex.md create mode 100644 assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-cursor.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e0cdd0..1927b22 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/). ## [Unreleased] +### Added +- **Asset `dbx-ro-query` per-agent references**: new `/skills/dbx-ro-query/references/` subfolder holding agent-runtime-specific operational tips. Files are loaded on demand by the parent `SKILL.md` when an agent hits a runtime quirk; this matches the [agentskills.io](https://agentskills.io) `references/` convention. + - `agent-claude-code.md`: 2-minute `Bash` tool default timeout (warehouse cold-start hint) plus an exit-code-echo pattern for parseable rejection evidence. + - `agent-codex.md`: sandbox `network_access = true` setting; `login: false` on `shell_command` for clean captured output; warning against `Measure-Command` PowerShell wrappers; warehouse env var note. Validated by a live Codex test session against the released v1.6.0 install. + - `agent-cursor.md`: ready-to-paste `.cursor/rules/dbx-ro-query.mdc` rule snippet (Cursor does NOT auto-discover `.cursor/skills/`); terminal runtime notes confirming exit codes are surfaced natively. Validated by a live Cursor 3.3.x / Composer 2 test session. +- **Asset `dbx-ro-query` README troubleshooting**: documented two install-time pitfalls surfaced by independent Codex and Cursor test runs. The first is a generic Databricks CLI quirk: `bundle init` fails resolving a stale `DATABRICKS_CONFIG_PROFILE`; workaround is to re-point the env var at a valid profile. The second is Codex-specific: sandbox blocks the GitHub URL fetch unless `network_access = true` is set. + +### Changed +- **Asset `dbx-ro-query` SKILL.md operational notes**: the runtime-specific bullet that named Codex inline has been replaced with a generic on-demand pointer to the new `references/` folder. The remaining operational notes are agent-agnostic. Keeps `SKILL.md` from drifting toward a multi-vendor compatibility matrix as more agents are documented. +- **Asset `dbx-ro-query` `success_message`**: each per-agent section now points at its `references/agent-.md` file rather than restating wiring inline. The Cursor section explicitly calls out that `.cursor/skills/` is not auto-discovered; users see this at install time, not after their first failed query. + ## [1.6.0] - 2026-05-09 ### Added diff --git a/assets/dbx-ro-query/README.md b/assets/dbx-ro-query/README.md index 61b9c83..74042d2 100644 --- a/assets/dbx-ro-query/README.md +++ b/assets/dbx-ro-query/README.md @@ -43,6 +43,30 @@ python /skills/dbx-ro-query/scripts/dbx-ro-query.py \ See `/skills/dbx-ro-query/SKILL.md` for the full argument reference, output formats, supported SQL prefixes, and the rejection list. +## Troubleshooting + +### `databricks bundle init` fails with `has no profile configured` + +The CLI tries to resolve a Databricks profile during `bundle init` even though template installation does not need workspace auth. If your shell or IDE has `DATABRICKS_CONFIG_PROFILE` pointing at a profile that no longer exists in `~/.databrickscfg`, install fails with `Error: resolve: /.databrickscfg has no profile configured`. + +Workaround: run the install with the env var pointed at a valid profile (or `DEFAULT`): + +```bash +DATABRICKS_CONFIG_PROFILE=DEFAULT databricks bundle init https://github.com/vmariiechko/databricks-bundle-template \ + --template-dir assets/dbx-ro-query +``` + +### Codex sandbox blocks the GitHub URL fetch + +If `databricks bundle init ` fails inside Codex with `connectex: An attempt was made to access a socket in a way forbidden by its access permissions`, the Codex sandbox is blocking outbound network. Enable network access in `~/.codex/config.toml`: + +```toml +[sandbox_workspace_write] +network_access = true +``` + +Then restart Codex. See `/skills/dbx-ro-query/references/agent-codex.md` for the full Codex runtime checklist. + ## What this asset is A standalone sub-template in the [databricks-bundle-template](https://github.com/vmariiechko/databricks-bundle-template) asset library. It does not depend on the core template; it can be installed into any Databricks bundle, or any project at all that uses the Databricks CLI. See [ASSETS.md](../../ASSETS.md) for the full catalog. diff --git a/assets/dbx-ro-query/databricks_template_schema.json b/assets/dbx-ro-query/databricks_template_schema.json index 6d93c6c..9ad5196 100644 --- a/assets/dbx-ro-query/databricks_template_schema.json +++ b/assets/dbx-ro-query/databricks_template_schema.json @@ -12,7 +12,7 @@ } }, - "success_message": "\nSkill installed at '{{.target_dir}}/skills/dbx-ro-query/'.\n\nNext steps:\n\n1. Wire the skill into your agent. Pick the line that matches your setup:\n\n Claude Code\n - Auto-discovery folder is `.claude/skills/`. If you installed to `.claude` directly, you're done.\n - Otherwise add to CLAUDE.md:\n > Use the skill at {{.target_dir}}/skills/dbx-ro-query/SKILL.md when running read-only SQL against Databricks.\n\n Codex\n - Add to AGENTS.md:\n > Use the skill at {{.target_dir}}/skills/dbx-ro-query/SKILL.md for read-only Databricks SQL. Invoke `python {{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py \"\" \"\" \"\"`.\n\n Cursor\n - Add a rule under `.cursor/rules/` (or `.cursorrules`) referencing {{.target_dir}}/skills/dbx-ro-query/SKILL.md.\n\n Gemini CLI\n - Reference {{.target_dir}}/skills/dbx-ro-query/SKILL.md from your `.gemini/` configuration.\n\n Other / multi-agent\n - Point your agent at {{.target_dir}}/skills/dbx-ro-query/SKILL.md however your runtime expects.\n\n2. Smoke check the wrapper:\n python {{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py \"SELECT 1\" scalar\n", + "success_message": "\nSkill installed at '{{.target_dir}}/skills/dbx-ro-query/'.\n\nNext steps:\n\n1. Wire the skill into your agent. Pick the line that matches your setup:\n\n Claude Code\n - Auto-discovery folder is `.claude/skills/`. If you installed to `.claude` directly, you're done.\n - Otherwise add to CLAUDE.md:\n > Use the skill at {{.target_dir}}/skills/dbx-ro-query/SKILL.md when running read-only SQL against Databricks.\n - Cold-start tips: see {{.target_dir}}/skills/dbx-ro-query/references/agent-claude-code.md.\n\n Codex\n - Add to AGENTS.md:\n > Use the skill at {{.target_dir}}/skills/dbx-ro-query/SKILL.md for read-only Databricks SQL. Invoke `python {{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py \"\" \"\" \"\"`.\n - Sandbox network and shell quirks: see {{.target_dir}}/skills/dbx-ro-query/references/agent-codex.md.\n\n Cursor\n - Cursor does NOT auto-discover `.cursor/skills/`. Drop a rule at `.cursor/rules/dbx-ro-query.mdc` (the exact snippet is in {{.target_dir}}/skills/dbx-ro-query/references/agent-cursor.md).\n\n Gemini CLI\n - Reference {{.target_dir}}/skills/dbx-ro-query/SKILL.md from your `.gemini/` configuration.\n\n Other / multi-agent\n - Point your agent at {{.target_dir}}/skills/dbx-ro-query/SKILL.md however your runtime expects.\n\n2. Smoke check the wrapper:\n python {{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py \"SELECT 1\" scalar\n", "min_databricks_cli_version": "v0.296.0", "version": 1 diff --git a/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/SKILL.md b/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/SKILL.md index 91cb2ef..f1c64fd 100644 --- a/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/SKILL.md +++ b/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/SKILL.md @@ -72,7 +72,7 @@ python .agents/skills/dbx-ro-query/scripts/dbx-ro-query.py \ ## Operational notes - Never call `databricks bundle deploy` or `databricks bundle run` from this skill. The skill's purpose is read-only evidence; deployment is out of scope. -- If your shell runtime has a login/profile option, disable it for invocations of this script. In Codex, set `login: false` on `shell_command`. If output contains valid TSV/results followed by `oh-my-posh`, `Terminal-Icons`, `ResourceUnavailable`, or `Export-Clixml` errors, rerun with the parent shell's no-profile / non-login option. +- For agent-runtime-specific quirks (login chains, command timeouts, captured-output noise, exit-code visibility, sandbox auth), load the file under `references/` matching your runtime: `references/agent-claude-code.md`, `references/agent-codex.md`, etc. The notes below this bullet are generic and apply regardless of runtime. - If Databricks auth, token cache, or profile access fails because of sandbox restrictions, rerun the same command with the runtime's elevated / outside-sandbox execution mechanism. Do not call raw `databricks experimental aitools tools query` to work around the wrapper. - Use `python3` on Unix hosts if `python` does not point to Python 3. The script is Python 3.9+, no third-party dependencies. - Exit code is 0 on success, non-zero on validation failure or upstream CLI error. Stderr carries the upstream error message; stdout carries the formatted result. diff --git a/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-claude-code.md b/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-claude-code.md new file mode 100644 index 0000000..67cb682 --- /dev/null +++ b/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-claude-code.md @@ -0,0 +1,20 @@ +# Claude Code runtime tips + +Load this file on demand when running `dbx-ro-query` inside Claude Code. Skip it for other runtimes. + +## Bash tool default timeout (2 minutes) + +The `Bash` tool defaults to a 120 000 ms (2-minute) timeout. Cold-starting a Databricks SQL warehouse can exceed this: serverless warehouses typically take 30 to 60 seconds, classic warehouses 3 to 5 minutes. If the first invocation against a stopped warehouse times out, set the `Bash` tool's `timeout` parameter higher (for example 300 000 ms) for the warm-up call, then drop back to the default for subsequent queries. + +## Capturing rejections as parseable evidence + +Claude Code's harness already surfaces non-zero exit codes to the model when a `Bash` call fails, so you do not need any extra wiring to *detect* a wrapper rejection. You only need extra wiring to *embed* the exit code in the captured output text, which is useful when logging a rejection as machine-readable evidence in a report. + +For evidence captures, append `; echo "exit=$?"` to the invocation: + +```bash +python .agents/skills/dbx-ro-query/scripts/dbx-ro-query.py \ + "DROP TABLE foo" "" 2>&1; echo "exit=$?" +``` + +The captured output then ends with `exit=1` (or whatever the wrapper returned), which downstream consumers can grep without parsing the harness's structured error block. diff --git a/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-codex.md b/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-codex.md new file mode 100644 index 0000000..47f4907 --- /dev/null +++ b/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-codex.md @@ -0,0 +1,41 @@ +# Codex runtime tips + +Load this file on demand when running `dbx-ro-query` inside Codex. Skip it for other runtimes. + +## Sandbox network access + +If `databricks` calls fail with errors like `connectex: An attempt was made to access a socket in a way forbidden by its access permissions`, the Codex sandbox is blocking outbound network. Enable network access in `~/.codex/config.toml`: + +```toml +[sandbox_workspace_write] +network_access = true +``` + +Restart Codex after the config change. The same block can manifest as `databricks bundle init` failing against a GitHub URL or as auth/warehouse calls timing out; both clear once the sandbox is allowed to reach the network. + +## Disable the shell login chain for wrapper invocations + +If `shell_command` runs through a login shell, captured output may end up polluted with profile noise such as `oh-my-posh`, `Terminal-Icons`, `Set-PSReadLineOption`, `ResourceUnavailable`, or `Export-Clixml` errors. These appear *after* the wrapper's actual output, so the SQL result itself is correct but the captured text is hard to paste cleanly into evidence. + +Set `login: false` on `shell_command` for invocations of `dbx-ro-query.py`: + +```yaml +shell_command: + login: false +``` + +If you see a valid TSV or scalar result followed by such errors, rerun with `login: false` (or your local equivalent) to get clean captured output. + +## Exit codes are already visible + +Codex surfaces `Exit code: N` directly in `shell_command` results to the model. You do not need to append `; echo "exit=$?"` to detect failures. Only add it when you specifically want the exit code embedded in the captured output text for parseable evidence logs. + +Avoid wrapping the wrapper with PowerShell `Measure-Command`. It can hide the child process exit code from Codex. + +## Warehouse selection + +The CLI auto-detects an available warehouse. For deterministic test matrices across sessions or machines, set `DATABRICKS_WAREHOUSE_ID=` explicitly, either as an env var or via your shell config. First post-cold-start call typically takes 20-30 seconds; subsequent warm calls return in 2-3 seconds. + +## Per-command timeout + +Codex does not impose a fixed default; tune `timeout_ms` per call. `timeout_ms: 300000` (5 minutes) covers warehouse cold starts comfortably without making fast queries feel sluggish. diff --git a/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-cursor.md b/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-cursor.md new file mode 100644 index 0000000..e56d409 --- /dev/null +++ b/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/references/agent-cursor.md @@ -0,0 +1,36 @@ +# Cursor runtime tips + +Load this file on demand when running `dbx-ro-query` inside Cursor (Composer or any IDE-resident agent). Skip it for other runtimes. Verified against Cursor 3.3.x with the Composer 2 model. + +## Wiring: rules live under `.cursor/rules/`, not `.cursor/skills/` + +Cursor does not auto-discover arbitrary markdown under `.cursor/skills/`. Project-rule discovery is `.cursor/rules/*.mdc`. After installing this asset with `target_dir: .cursor`, drop a rule file at `.cursor/rules/dbx-ro-query.mdc` so Composer surfaces the skill on every prompt. + +Suggested content for `.cursor/rules/dbx-ro-query.mdc`: + +````markdown +--- +description: Read-only Databricks SQL via the dbx-ro-query skill +alwaysApply: true +--- + +When you need to query Databricks (schema discovery, row sampling, aggregations, EXPLAIN — anything that reads but must not mutate), follow the skill at `.cursor/skills/dbx-ro-query/SKILL.md`. Invoke the bundled wrapper: + +``` +python .cursor/skills/dbx-ro-query/scripts/dbx-ro-query.py "" "" "" +``` + +Do not call `databricks experimental aitools tools query` directly; the wrapper enforces a read-only guard. +```` + +The legacy `.cursorrules` file at the project root may still work in some setups, but `.cursor/rules/` is Cursor's current documented layout. Use `alwaysApply: true` so the rule is in scope without the user having to invoke it manually. + +## Terminal runtime + +- Cursor's terminal tool surfaces stdout, stderr, and `exit_code` to the model on every command. You do not need to append `; echo "exit=$?"` to detect failures. +- First query against a stopped or cold warehouse takes a few seconds (warm-up). Subsequent queries on a warm warehouse return in ~2-3 seconds. No observed timeout in default Composer settings. +- Output capture is clean. No login-shell or status-decoration noise has been observed. + +## Profile mapping + +The `` argument passed to the wrapper is a Databricks CLI profile name from `~/.databrickscfg`, not the `DATABRICKS_CONFIG_PROFILE` env var. The two only intersect when the user has explicitly exported the env var. Pass the profile explicitly to keep behavior deterministic. diff --git a/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py b/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py index 1d69874..53ae6f7 100644 --- a/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py +++ b/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py @@ -27,7 +27,6 @@ from collections.abc import Iterable from typing import Any - ALLOWED_FORMATS = ("auto", "scalar", "lines", "csv", "tsv", "json") # Only statements that start with one of these tokens are allowed through. diff --git a/tests/assets/test_dbx_ro_query.py b/tests/assets/test_dbx_ro_query.py index 32de5a6..4c44ac3 100644 --- a/tests/assets/test_dbx_ro_query.py +++ b/tests/assets/test_dbx_ro_query.py @@ -23,6 +23,9 @@ EXPECTED_FILES = ( "skills/dbx-ro-query/SKILL.md", "skills/dbx-ro-query/scripts/dbx-ro-query.py", + "skills/dbx-ro-query/references/agent-claude-code.md", + "skills/dbx-ro-query/references/agent-codex.md", + "skills/dbx-ro-query/references/agent-cursor.md", ) @@ -79,6 +82,26 @@ def test_skill_frontmatter_well_formed(installed: Path): assert "description:" in front, "SKILL.md frontmatter missing `description`" +def test_skill_references_pointer_present(installed: Path): + """SKILL.md operational notes must point readers at the references/ folder. + + The references/ subfolder holds per-agent runtime tips (loaded on demand). + SKILL.md needs to surface this discovery hint so agents know to look there + when they hit a runtime quirk.""" + skill = installed / DEFAULT_TARGET / "skills" / "dbx-ro-query" / "SKILL.md" + text = skill.read_text(encoding="utf-8") + assert "references/" in text, "SKILL.md missing pointer to references/ folder" + + +def test_references_files_have_headings(installed: Path): + """Every references file must start with a Markdown H1 so agents loading + it on demand see a clear scope title.""" + refs_dir = installed / DEFAULT_TARGET / "skills" / "dbx-ro-query" / "references" + for ref in refs_dir.glob("agent-*.md"): + first_line = ref.read_text(encoding="utf-8").splitlines()[0] + assert first_line.startswith("# "), f"{ref.name} missing H1 heading on first line" + + @pytest.mark.parametrize( "sql", [