diff --git a/CHANGELOG.md b/CHANGELOG.md index 3611d94..601b6d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/). ## [Unreleased] +## [1.7.1] - 2026-05-13 + +### Fixed +- **Asset `dbx-ro-query` stdout/stderr encoding**: `configure_text_streams()` reconfigures both output streams to UTF-8 with `errors="replace"` at startup, preventing `charmap` codec errors when query results contain non-ASCII characters (Greek, Cyrillic, emoji, etc.). + +### Changed +- **Asset `dbx-ro-query` install message**: added a "Set your warehouse ID" step to the post-install `success_message`. First-time users now see the `databricks warehouses list` lookup command and the `DATABRICKS_WAREHOUSE_ID` export pattern immediately after wiring, before the smoke-check step. + ## [1.7.0] - 2026-05-10 ### Added diff --git a/assets/dbx-ro-query/databricks_template_schema.json b/assets/dbx-ro-query/databricks_template_schema.json index 9ad5196..f3489e2 100644 --- a/assets/dbx-ro-query/databricks_template_schema.json +++ b/assets/dbx-ro-query/databricks_template_schema.json @@ -12,7 +12,7 @@ } }, - "success_message": "\nSkill installed at '{{.target_dir}}/skills/dbx-ro-query/'.\n\nNext steps:\n\n1. Wire the skill into your agent. Pick the line that matches your setup:\n\n Claude Code\n - Auto-discovery folder is `.claude/skills/`. If you installed to `.claude` directly, you're done.\n - Otherwise add to CLAUDE.md:\n > Use the skill at {{.target_dir}}/skills/dbx-ro-query/SKILL.md when running read-only SQL against Databricks.\n - Cold-start tips: see {{.target_dir}}/skills/dbx-ro-query/references/agent-claude-code.md.\n\n Codex\n - Add to AGENTS.md:\n > Use the skill at {{.target_dir}}/skills/dbx-ro-query/SKILL.md for read-only Databricks SQL. Invoke `python {{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py \"\" \"\" \"\"`.\n - Sandbox network and shell quirks: see {{.target_dir}}/skills/dbx-ro-query/references/agent-codex.md.\n\n Cursor\n - Cursor does NOT auto-discover `.cursor/skills/`. Drop a rule at `.cursor/rules/dbx-ro-query.mdc` (the exact snippet is in {{.target_dir}}/skills/dbx-ro-query/references/agent-cursor.md).\n\n Gemini CLI\n - Reference {{.target_dir}}/skills/dbx-ro-query/SKILL.md from your `.gemini/` configuration.\n\n Other / multi-agent\n - Point your agent at {{.target_dir}}/skills/dbx-ro-query/SKILL.md however your runtime expects.\n\n2. Smoke check the wrapper:\n python {{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py \"SELECT 1\" scalar\n", + "success_message": "\nSkill installed at '{{.target_dir}}/skills/dbx-ro-query/'.\n\nNext steps:\n\n1. Wire the skill into your agent. Pick the line that matches your setup:\n\n Claude Code\n - Auto-discovery folder is `.claude/skills/`. If you installed to `.claude` directly, you're done.\n - Otherwise add to CLAUDE.md:\n > Use the skill at {{.target_dir}}/skills/dbx-ro-query/SKILL.md when running read-only SQL against Databricks.\n - Cold-start tips: see {{.target_dir}}/skills/dbx-ro-query/references/agent-claude-code.md.\n\n Codex\n - Add to AGENTS.md:\n > Use the skill at {{.target_dir}}/skills/dbx-ro-query/SKILL.md for read-only Databricks SQL. Invoke `python {{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py \"\" \"\" \"\"`.\n - Sandbox network and shell quirks: see {{.target_dir}}/skills/dbx-ro-query/references/agent-codex.md.\n\n Cursor\n - Cursor does NOT auto-discover `.cursor/skills/`. Drop a rule at `.cursor/rules/dbx-ro-query.mdc` (the exact snippet is in {{.target_dir}}/skills/dbx-ro-query/references/agent-cursor.md).\n\n Gemini CLI\n - Reference {{.target_dir}}/skills/dbx-ro-query/SKILL.md from your `.gemini/` configuration.\n\n Other / multi-agent\n - Point your agent at {{.target_dir}}/skills/dbx-ro-query/SKILL.md however your runtime expects.\n\n2. Set your warehouse ID (one-time setup):\n The script inherits DATABRICKS_WAREHOUSE_ID from your environment.\n Find your warehouse ID:\n databricks warehouses list --profile \n Then add to your shell config (.bashrc, .zshrc, or Windows user environment variables):\n export DATABRICKS_WAREHOUSE_ID=\n Reload your shell (or open a new terminal) before the smoke check below.\n\n3. Smoke check the wrapper:\n python {{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py \"SELECT 1\" scalar\n", "min_databricks_cli_version": "v0.296.0", "version": 1 diff --git a/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py b/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py index 53ae6f7..3cb8a4f 100644 --- a/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py +++ b/assets/dbx-ro-query/template/{{.target_dir}}/skills/dbx-ro-query/scripts/dbx-ro-query.py @@ -200,7 +200,15 @@ def run_query(sql: str, profile: str) -> list[dict[str, Any]]: return normalize_rows(load_json_from_lines(proc.stdout.splitlines())) +def configure_text_streams() -> None: + for stream in (sys.stdout, sys.stderr): + reconfigure = getattr(stream, "reconfigure", None) + if reconfigure is not None: + reconfigure(encoding="utf-8", errors="replace") + + def main(argv: list[str] | None = None) -> int: + configure_text_streams() parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("sql") parser.add_argument("profile") diff --git a/tests/assets/test_dbx_ro_query.py b/tests/assets/test_dbx_ro_query.py index 4c44ac3..8d08658 100644 --- a/tests/assets/test_dbx_ro_query.py +++ b/tests/assets/test_dbx_ro_query.py @@ -225,3 +225,23 @@ def test_format_rows_csv_quotes_embedded_delimiter(script_module): def test_format_rows_unknown_format_rejects(script_module): with pytest.raises(SystemExit): script_module.format_rows([{"a": 1}], "yaml") + + +def test_configure_text_streams_runs_without_error(script_module): + """configure_text_streams must not raise even when stdout is a pytest capture object.""" + script_module.configure_text_streams() + + +def test_format_rows_non_ascii_scalar(script_module): + """format_rows returns non-ASCII characters correctly in scalar mode.""" + out = script_module.format_rows([{"value": "αβγδ"}], "scalar") + assert out == "αβγδ" + + +def test_format_rows_non_ascii_tsv(script_module): + """format_rows handles non-ASCII column values in TSV output.""" + rows = [{"city": "Αθήνα", "code": "ATH"}, {"city": "Ηράκλειο", "code": "HER"}] + out = script_module.format_rows(rows, "tsv") + assert "city\tcode" in out + assert "Αθήνα" in out + assert "Ηράκλειο" in out