diff --git a/Makefile b/Makefile index cb6260af..44cf052e 100644 --- a/Makefile +++ b/Makefile @@ -5,14 +5,14 @@ apply: fmt: npx --loglevel error --yes prettier --write **/*.yml - uvx mdformat --wrap 80 --number *.md + uvx --with mdformat-gfm --with mdformat-frontmatter mdformat --wrap 80 --number **/*.md uvx ruff format --line-length=100 **/*.py uvx ruff check --fix --line-length=100 **/*.py npx --loglevel error --yes @johnnymorganz/stylua-bin -- **/*.lua fmt_check: npx --loglevel error --yes prettier --check **/*.yml - uvx mdformat --check --wrap 80 --number *.md + uvx --with mdformat-gfm --with mdformat-frontmatter mdformat --check --wrap 80 --number **/*.md uvx ruff format --check --line-length=100 **/*.py uvx ruff check --line-length=100 **/*.py npx --loglevel error --yes @johnnymorganz/stylua-bin --check -- **/*.lua diff --git a/ansible/dotfiles.yml b/ansible/dotfiles.yml index 1d4adc57..8e177817 100644 --- a/ansible/dotfiles.yml +++ b/ansible/dotfiles.yml @@ -65,6 +65,17 @@ - skills - setup + - name: Include agent hooks and Cursor rules deployment + ansible.builtin.include_tasks: + file: tasks/agent_hooks.yml + apply: + tags: + - hooks + - setup + tags: + - hooks + - setup + - name: Include paperless setup tasks include_tasks: tasks/paperless.yml tags: diff --git a/ansible/tasks/agent_hooks.yml b/ansible/tasks/agent_hooks.yml new file mode 100644 index 00000000..5e8f9023 --- /dev/null +++ b/ansible/tasks/agent_hooks.yml @@ -0,0 +1,32 @@ +--- +- name: Ensure agent config directories exist + ansible.builtin.file: + path: "{{ item }}" + state: directory + mode: "0755" + loop: + - "{{ ansible_user_dir }}/.claude" + - "{{ ansible_user_dir }}/.cursor" + tags: + - hooks + - setup + +- name: Link Claude settings (global prompt hooks) + ansible.builtin.file: + src: "{{ dotfiles_dir }}/claude/settings.json" + dest: "{{ ansible_user_dir }}/.claude/settings.json" + state: link + force: true + tags: + - hooks + - setup + +- name: Link Cursor rules + ansible.builtin.file: + src: "{{ dotfiles_dir }}/cursor/rules" + dest: "{{ ansible_user_dir }}/.cursor/rules" + state: link + force: true + tags: + - hooks + - setup diff --git a/ansible/tasks/neovim.yml b/ansible/tasks/neovim.yml index efb9fd62..dd437b3c 100644 --- a/ansible/tasks/neovim.yml +++ b/ansible/tasks/neovim.yml @@ -9,6 +9,7 @@ - debugpy - isort - mdformat + - mdformat-frontmatter - neovim - pynvim - ruff diff --git a/claude/hooks/dbt-rules.sh b/claude/hooks/dbt-rules.sh new file mode 100755 index 00000000..c65afca7 --- /dev/null +++ b/claude/hooks/dbt-rules.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +# PostToolUse(Write|Edit) hook: inject dbt conventions (cursor/rules/dbt.mdc) +# when a .sql/.yml file inside a dbt project (dbt_project.yml ancestor) is +# edited. Claude Code has no equivalent of Cursor's glob-scoped rule loading, +# so this mirrors it: full rule on the first qualifying edit per session, +# then a one-line reminder of the load-bearing rules on later edits — cheap +# insurance against context compaction without re-paying ~700 tokens per +# edit. + +set -euo pipefail + +input=$(cat) +file_path=$(printf '%s' "$input" | jq -r '.tool_input.file_path // empty') + +case "$file_path" in + *.sql | *.yml) ;; + *) exit 0 ;; +esac + +dir=$(dirname "$file_path") +found="" +while [ -n "$dir" ] && [ "$dir" != "/" ]; do + if [ -f "$dir/dbt_project.yml" ]; then + found=1 + break + fi + dir=$(dirname "$dir") +done +[ -n "$found" ] || exit 0 + +emit() { + jq -n --arg ctx "$1" \ + '{hookSpecificOutput: {hookEventName: "PostToolUse", additionalContext: $ctx}}' +} + +session_id=$(printf '%s' "$input" | jq -r '.session_id // empty') +marker="" +case "$session_id" in + "" | *[!A-Za-z0-9_-]*) ;; + *) marker="${TMPDIR:-/tmp}/claude-dbt-rules-${session_id}" ;; +esac + +if [ -n "$marker" ] && [ -e "$marker" ]; then + emit "Reminder — dbt conventions apply (full rules injected earlier this session): every model has a uniqueness test on its declared grain; a failing grain test is a finding, NEVER fixed by widening the key or deduplicating — stop and report the join that fanned out; all dedup (DISTINCT/QUALIFY/row_number) must be called for in the plan; marts carry a reconciliation test against a number the model does not control." + exit 0 +fi + +script_dir=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) +dbt_rule="$script_dir/../../cursor/rules/dbt.mdc" + +if [ ! -f "$dbt_rule" ]; then + echo "dbt-rules hook: missing dbt rule at $dbt_rule" >&2 + exit 1 +fi + +# Strip the YAML frontmatter (between the first two `---` lines). +rule_body=$(awk 'BEGIN{f=0} /^---$/{f++; next} f>=2{print}' "$dbt_rule") + +prompt=$( + cat < +$rule_body + +PROMPT +) + +if [ -n "$marker" ]; then + touch "$marker" +fi +emit "$prompt" diff --git a/claude/hooks/plan-review.sh b/claude/hooks/plan-review.sh new file mode 100755 index 00000000..d14d53cf --- /dev/null +++ b/claude/hooks/plan-review.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash +# PostToolUse(Write|Edit) hook: review plan documents. +# Path-gates first so non-plan edits exit silently, then emits an audit +# prompt wrapping the canonical planning rule (cursor/rules/planning.mdc) +# as PostToolUse additionalContext JSON — plain stdout at exit 0 never +# reaches the model. The full rule (~300 tokens) is injected on every +# plan edit: a once-per-session marker would go stale after context +# compaction, leaving the model auditing against a rubric it no longer +# has. + +set -euo pipefail + +input=$(cat) +file_path=$(printf '%s' "$input" | jq -r '.tool_input.file_path // empty') + +case "$file_path" in + */.cursor/plans/* | */.claude/plans/*) ;; + *) exit 0 ;; +esac + +emit() { + jq -n --arg ctx "$1" \ + '{hookSpecificOutput: {hookEventName: "PostToolUse", additionalContext: $ctx}}' +} + +script_dir=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) +planning_rule="$script_dir/../../cursor/rules/planning.mdc" + +if [ ! -f "$planning_rule" ]; then + echo "plan-review hook: missing planning rule at $planning_rule" >&2 + exit 1 +fi + +# Strip the YAML frontmatter (between the first two `---` lines). +rule_body=$(awk 'BEGIN{f=0} /^---$/{f++; next} f>=2{print}' "$planning_rule") + +prompt=$( + cat < below. The gates are: + +1. Exit criteria +2. Invariants +3. Failure modes (including the one-line premortem) +4. Assumptions & unknowns +5. Outside view +6. Minimal viable change +7. Visualization confirmation (only when the plan involves figures) + +A gate passes ONLY if you can quote the plan line(s) that satisfy it. +Paraphrases, "implied somewhere," or "this is obvious" do NOT count. +If you are unsure whether a quote satisfies a gate, the gate FAILS. + + +$rule_body + + +## Audit output + +Output exactly one line per gate, in the order listed above: + +\`: PASS — ""\` +\`: FAIL — \` +\`: N/A — \` + +If there are no FAIL lines, end with \`PLAN OK\` on its own line and +begin implementing. Otherwise stop and ask the user to fill the gaps +before implementing. +PROMPT +) + +emit "$prompt" diff --git a/claude/hooks/stop-check.sh b/claude/hooks/stop-check.sh new file mode 100755 index 00000000..e4de0c26 --- /dev/null +++ b/claude/hooks/stop-check.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash +# Stop hook: nag about build/render when, in the current turn: +# - a .sql file was edited inside a dbt project (dbt_project.yml ancestor), or +# - a .qmd file was edited (any location — quarto renders standalone files) +# AND no build/render command was run this turn. +# +# Reads the per-session state file written by track-tool-use.sh instead of +# parsing the transcript. A nag is delivered as {"decision": "block", +# "reason": ...} — the only output Claude Code feeds back to the model. +# +# Robustness invariants (this is a reminder, not an enforcement gate): +# - Blocks at most ONCE per turn: when stop_hook_active is set (Claude is +# already continuing because of this hook), clear state and allow the +# stop unconditionally. We deliberately do not re-check whether the +# build actually ran — re-checking is how agents get stuck in loops. +# - Every error path fails open (exit 0, no block): missing/malformed +# state file or fields, jq failures, unreadable input. + +set -uo pipefail # no -e: error paths must fall through to allow the stop + +input=$(cat) || exit 0 +session_id=$(printf '%s' "$input" | jq -r '.session_id // empty' 2>/dev/null) || exit 0 +[ -z "$session_id" ] && exit 0 +case "$session_id" in + *[!A-Za-z0-9_-]*) exit 0 ;; +esac + +state_file="${TMPDIR:-/tmp}/claude-turn-${session_id}.jsonl" + +stop_hook_active=$(printf '%s' "$input" | jq -r '.stop_hook_active // false' 2>/dev/null) || stop_hook_active=false +if [ "$stop_hook_active" = "true" ]; then + rm -f "$state_file" + exit 0 +fi + +[ -f "$state_file" ] || exit 0 + +edits=$(jq -r 'select(.tool == "Write" or .tool == "Edit") | .path // empty' \ + "$state_file" 2>/dev/null | grep -E '\.(sql|qmd)$') || edits="" + +if [ -z "$edits" ]; then + rm -f "$state_file" + exit 0 +fi + +# Walk up from a directory looking for any of the given marker filenames. +# Returns 0 if any marker is found, 1 otherwise. +has_marker() { + local dir="$1" + shift + while [ -n "$dir" ] && [ "$dir" != "/" ]; do + for marker in "$@"; do + [ -f "$dir/$marker" ] && return 0 + done + dir=$(dirname "$dir") + done + return 1 +} + +dbt_files="" +quarto_files="" + +while IFS= read -r path; do + [ -z "$path" ] && continue + case "$path" in + *.sql) + if has_marker "$(dirname "$path")" dbt_project.yml; then + dbt_files="${dbt_files}${path}"$'\n' + fi + ;; + *.qmd) + quarto_files="${quarto_files}${path}"$'\n' + ;; + esac +done <<<"$edits" + +if [ -z "$dbt_files" ] && [ -z "$quarto_files" ]; then + rm -f "$state_file" + exit 0 +fi + +bashes=$(jq -r 'select(.tool == "Bash") | .cmd // empty' "$state_file" 2>/dev/null) || bashes="" + +if printf '%s' "$bashes" | grep -qE '(just (build|html|render)|dbt (build|run|test)|quarto render)'; then + rm -f "$state_file" + exit 0 +fi + +# Keep the state file: the post-block stop is allowed via stop_hook_active, +# which also cleans it up. +reason=$( + echo "You edited build-relevant files this turn but did not run a build/render command:" + if [ -n "$dbt_files" ]; then + echo + echo "dbt project files (dbt_project.yml ancestor found):" + printf '%s' "$dbt_files" | sed 's/^/ /' + echo " → run \`dbt build\` (or \`just build\` if the project has a justfile target)." + fi + if [ -n "$quarto_files" ]; then + echo + echo "quarto documents:" + printf '%s' "$quarto_files" | sed 's/^/ /' + echo " → run \`quarto render \` (or \`just html\`/\`just render\` if the project has it)." + fi + echo + echo "Run the build/render now; if it fails, fix the failures before stopping, and report the result either way." + echo "If a build is genuinely not applicable (e.g. the edit was comment-only or the file was deleted later in the turn), say why in one line instead." +) + +jq -n --arg reason "$reason" '{decision: "block", reason: $reason}' 2>/dev/null || true +exit 0 diff --git a/claude/hooks/track-tool-use.sh b/claude/hooks/track-tool-use.sh new file mode 100755 index 00000000..a1127f5b --- /dev/null +++ b/claude/hooks/track-tool-use.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +# PostToolUse(Write|Edit|Bash) hook: append this turn's tool uses to a +# per-session state file so the Stop hook (stop-check.sh) can see which +# files were edited and which commands ran without parsing the transcript. +# Never blocks anything: no output, always exits 0. + +set -uo pipefail + +input=$(cat) || exit 0 +session_id=$(printf '%s' "$input" | jq -r '.session_id // empty' 2>/dev/null) || exit 0 +[ -z "$session_id" ] && exit 0 + +# Session id becomes part of a filename; accept only safe characters. +case "$session_id" in + *[!A-Za-z0-9_-]*) exit 0 ;; +esac + +printf '%s' "$input" | jq -c '{ + tool: .tool_name, + path: (.tool_input.file_path // null), + cmd: (.tool_input.command // null) +}' >>"${TMPDIR:-/tmp}/claude-turn-${session_id}.jsonl" 2>/dev/null || true + +exit 0 diff --git a/claude/settings.json b/claude/settings.json new file mode 100644 index 00000000..bf317b4c --- /dev/null +++ b/claude/settings.json @@ -0,0 +1,53 @@ +{ + "permissions": { + "allow": [ + "Bash(uv *)", + "Bash(just *)", + "Bash(duckdb *)" + ], + "defaultMode": "auto" + }, + "hooks": { + "PostToolUse": [ + { + "matcher": "Write|Edit", + "hooks": [ + { + "type": "command", + "command": "$HOME/.dotfiles/claude/hooks/plan-review.sh", + "timeout": 5 + }, + { + "type": "command", + "command": "$HOME/.dotfiles/claude/hooks/dbt-rules.sh", + "timeout": 5 + } + ] + }, + { + "matcher": "Write|Edit|Bash", + "hooks": [ + { + "type": "command", + "command": "$HOME/.dotfiles/claude/hooks/track-tool-use.sh", + "timeout": 5 + } + ] + } + ], + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "$HOME/.dotfiles/claude/hooks/stop-check.sh", + "timeout": 5 + } + ] + } + ] + }, + "tui": "fullscreen", + "theme": "auto", + "skipAutoPermissionPrompt": true +} diff --git a/cursor/README.md b/cursor/README.md new file mode 100644 index 00000000..58891a83 --- /dev/null +++ b/cursor/README.md @@ -0,0 +1,59 @@ +# Cursor and Claude agent configuration + +User-level **prompt hooks** (via Claude settings) and **Cursor rules** apply +across all projects when deployed. + +## Deployment + +Symlink into your home directory: + +```bash +ln -sf ~/.dotfiles/claude/settings.json ~/.claude/settings.json +ln -sf ~/.dotfiles/cursor/rules ~/.cursor/rules +``` + +Or run Ansible with the `hooks` or `setup` tag: + +```bash +./apply_ansible hooks +``` + +### Cursor: enable third-party hooks + +Hooks live in `~/.claude/settings.json` (Claude Code format). Cursor loads them +when **Settings → Features → Third-party skills** is enabled. + +Project-level `.cursor/hooks.json` in a repo can still add project-specific +hooks; global hooks come from `~/.claude/settings.json`. + +## What's included + +### Hooks (`~/.claude/settings.json`) + +| Hook | Event | What it does | +| ------------------------------------ | ------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Plan quality gate (`plan-review.sh`) | `PostToolUse` (Write/Edit) | When a plan under `.cursor/plans/` or `.claude/plans/` is edited, injects an audit prompt (via `additionalContext`) wrapping the gates defined in `planning.mdc` (single source of truth), with a per-gate PASS/FAIL/N-A verdict contract ending in `PLAN OK` when clean. The full rule is injected on every plan edit — a once-per-session marker would go stale after context compaction. | +| dbt rules (`dbt-rules.sh`) | `PostToolUse` (Write/Edit) | When a `.sql`/`.yml` file inside a dbt project (`dbt_project.yml` ancestor) is edited, injects `dbt.mdc` (via `additionalContext`) — Claude Code's equivalent of Cursor's glob-scoped rule loading. Full rule on the first qualifying edit per session; later edits get a one-line reminder of the load-bearing rules (grain test, no repair-loop, plan-sanctioned dedup, reconciliation). | +| Turn tracker (`track-tool-use.sh`) | `PostToolUse` (Write/Edit/Bash) | Appends edited file paths and Bash commands to a per-session state file in `$TMPDIR` so the Stop hook knows what happened this turn without parsing the transcript. No output. | +| Verify build (`stop-check.sh`) | `Stop` | If `.sql` files inside a dbt project or `.qmd` files were edited this turn and no build/render command ran, blocks the stop once (`decision: block`) with a reminder to run `dbt build` / `quarto render`. Loop-safe by design: allows the stop unconditionally when `stop_hook_active` is set (max one nag per turn) and fails open on any error. | + +dbt layer boundaries and other SQL conventions are enforced via **rules** +(`dbt.mdc`), not hooks. + +Hook output follows the **Claude Code** JSON contract +(`hookSpecificOutput.additionalContext`, `decision: block`); plain stdout from a +hook never reaches the model. Cursor's own hook protocol differs — these scripts +are written against Claude Code semantics. + +### Rules (`~/.cursor/rules/`) + +| Rule | Scope | What it does | +| -------------- | ---------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `planning.mdc` | Always applied | Plan structure (exit criteria, invariants, failure modes + premortem, assumptions & unknowns, outside view), minimal viable change, visualization confirmation | +| `dbt.mdc` | `*.sql`, `*.yml` | Layer boundaries, grain docstrings, testing conventions, anti-patterns | + +## Project-specific extensions + +Add `.cursor/rules/` or `.cursor/hooks.json` in a project repo for +domain-specific policy. Project hooks override user hooks where Cursor merges +configs. diff --git a/cursor/rules/dbt.mdc b/cursor/rules/dbt.mdc new file mode 100644 index 00000000..8f61257d --- /dev/null +++ b/cursor/rules/dbt.mdc @@ -0,0 +1,67 @@ +--- +description: dbt modelling conventions +globs: "*.sql,*.yml" +alwaysApply: false +--- + +## Layer boundaries + +``` +staging -> intermediate -> mart/core (fct/dim) -> mart/ (consumer views) +``` + +- Staging models do minimal transformation (type casting, renaming). +- Intermediate models contain business logic. They reference staging or + other intermediates. **Intermediates NEVER reference mart models.** +- mart/core models (`fct_`, `dim_`) assemble intermediates into + business-facing tables with minimal additional logic. +- mart/ (top-level) models are thin views joining core models for specific + consumers. Consumers should reference these, not core tables directly. +- If a staging model is getting complex, push logic to an intermediate. + +## Grain and docstrings + +- Each model's SQL docstring starts with `Grain: ...` describing what one + row represents. +- The grain comes from the plan (operator-reviewed), not from inspecting + what the SQL happens to produce. Docstring, uniqueness test, and plan + must all state the same grain. +- Avoid stating implementation details obvious from the SQL itself. + +## Testing + +- Models should always have data tests: at minimum `not_null` and `unique` + on columns where expected. +- Every model has a uniqueness test on its declared grain (single-column + `unique` or `dbt_utils.unique_combination_of_columns`). +- A failing grain/uniqueness test is a finding, not a bug to fix. NEVER + make it pass by widening the key, adding `DISTINCT`/`QUALIFY`, or + deduplicating. Stop, identify which join produced the duplicates, and + report it to the operator. +- Marts should carry at least one reconciliation test tying a row count or + total to a number the model does not control: an upstream entity count + (e.g. rows == `count(distinct sample_id)` in the source) or a known + external figure. +- Use `dbt_utils` and `dbt_expectations` for integrity tests. +- When an ID column is a hash of dimension columns, add + `dbt_expectations.expect_column_distinct_count_to_equal` with `value: 1` + and `group_by` on those dimension columns. +- Catch data integrity issues as early as possible in the DAG. + +## Anti-patterns + +- Never hardcode thresholds as magic numbers. Compute dynamically from the + data or reference upstream constants. +- `SELECT DISTINCT` as a band-aid for duplicates is a code smell. Fix the + grain or join that produced them. +- All deduplication (`DISTINCT`, `QUALIFY`, `row_number() = 1`) must be + called for in the plan. Unplanned dedup is treated as hidden join + fan-out until shown otherwise. +- `SELECT *` should be avoided (redundant in DuckDB, hides column changes + elsewhere). +- Avoid unnecessary `CROSS JOIN`s that produce redundant zero-filled rows. + +## Verification + +- After modifying any `.sql` or `.yml` model file, run the project's build + command and confirm success before reporting completion. diff --git a/cursor/rules/planning.mdc b/cursor/rules/planning.mdc new file mode 100644 index 00000000..d549cffc --- /dev/null +++ b/cursor/rules/planning.mdc @@ -0,0 +1,50 @@ +--- +description: Planning and task execution rules +alwaysApply: true +--- + +## Planning + +Every plan MUST include these sections (bullets, reviewable in under 60 seconds): + +- **Exit criteria**: How we know the task is done. Always includes: + - Build succeeds (e.g. `just build`, `dbt build`) + - Rendered output is correct if applicable (e.g. `quarto render`) + - For refactors: before/after data snapshots with expected zero-row diffs + - For new/changed dbt models: a uniqueness test on the declared grain passes +- **Invariants**: What must not change. Examples: + - "row counts in table X unchanged" + - "no new test failures" + - "downstream consumers produce identical output" + - For every new or changed dbt model: its grain, stated as "one row per …". + The grain is a spec the operator reviews, not something the implementation + discovers — if the built model disagrees with the declared grain, the + declaration wins and the implementation is wrong. +- **Failure modes**: What could go wrong. Examples: + - "join fan-out from a 1:N relationship" + - "grain change in intermediate breaks downstream" + - "database lock from stale process" + - **Premortem** (one line): "It is 3 months later and this failed because …" (top 2–3 concrete reasons) +- **Assumptions & unknowns**: What we treat as true but have not verified; what this plan does *not* cover; what would invalidate the plan ("if X is wrong, we stop") +- **Outside view**: Reference class — how similar work in this repo/class usually goes; what typically breaks; buffer vs best-case estimate (e.g. "add 2× for grain changes") + +### Minimal viable change + +Propose the simplest design that works. One new column on an existing table +before a new table. One new table before a new model chain. Let the operator +request more complexity. + +### Confirm before building visualizations + +Before writing code for any figure, state and confirm: + (a) chart type, + (b) data universe / source table, + (c) x-axis metric, + (d) y-axis metric, + (e) grouping / faceting. +If uncertain about any of these, ask. Do not guess. + +### Plan format + +Plans should be concise and scannable. Use bullet points, not paragraphs. +The operator should be able to review a plan in under 60 seconds. diff --git a/dbt/dbt_deep_analysis.md b/dbt/dbt_deep_analysis.md index fbc10d47..257ab779 100644 --- a/dbt/dbt_deep_analysis.md +++ b/dbt/dbt_deep_analysis.md @@ -1,70 +1,89 @@ -You have access to a duckdb database. You are auditing a dbt model for data quality, correctness, and best practices. Interrogate the database to validate every claim you make — do not speculate without running a query first. +You have access to a duckdb database. You are auditing a dbt model for data +quality, correctness, and best practices. Interrogate the database to validate +every claim you make — do not speculate without running a query first. ## Audit checklist Work through each section. For every finding, run a query to confirm it. ### 1. Schema & types -- Are column types appropriate (e.g. dates stored as DATE not VARCHAR, monetary values as DECIMAL not FLOAT)? -- Are there implicit casts in joins or WHERE clauses that could silently drop rows or change values? + +- Are column types appropriate (e.g. dates stored as DATE not VARCHAR, monetary + values as DECIMAL not FLOAT)? +- Are there implicit casts in joins or WHERE clauses that could silently drop + rows or change values? - Do any columns contain mixed types or unexpected NULLs? ### 2. Join correctness -- Is every join relationship correct (1:1, 1:N, M:N)? Run a query: does the join **fan out** (produce more rows than the driving table)? -- Are there orphaned rows (LEFT JOIN misses)? What fraction of rows have NULL foreign keys after the join? -- Are join keys unique on the side that should be unique? Query `COUNT(*) vs COUNT(DISTINCT key)`. + +- Is every join relationship correct (1:1, 1:N, M:N)? Run a query: does the join + **fan out** (produce more rows than the driving table)? +- Are there orphaned rows (LEFT JOIN misses)? What fraction of rows have NULL + foreign keys after the join? +- Are join keys unique on the side that should be unique? Query + `COUNT(*) vs COUNT(DISTINCT key)`. ### 3. Filters & business logic -- Are there WHERE / HAVING filters that could silently exclude valid records (e.g. filtering on a column that is sometimes NULL)? -- Is there business logic (CASE statements, date arithmetic, aggregations) that could produce wrong results on edge cases? + +- Are there WHERE / HAVING filters that could silently exclude valid records + (e.g. filtering on a column that is sometimes NULL)? +- Is there business logic (CASE statements, date arithmetic, aggregations) that + could produce wrong results on edge cases? - Are date boundaries inclusive/exclusive as intended? ### 4. Grain & uniqueness -- What is the intended grain of this model? Verify with `COUNT(*) vs COUNT(DISTINCT )`. + +- What is the intended grain of this model? Verify with + `COUNT(*) vs COUNT(DISTINCT )`. - Could the model produce duplicate rows under any upstream data condition? ### 5. Data quality -- What percentage of each column is NULL? Flag any column where the NULL rate is suspicious. -- Are there unexpected duplicate values, negative numbers, future dates, or empty strings where there shouldn't be? -- Do value distributions look reasonable (run MIN, MAX, AVG, percentiles for numeric columns)? + +- What percentage of each column is NULL? Flag any column where the NULL rate is + suspicious. +- Are there unexpected duplicate values, negative numbers, future dates, or + empty strings where there shouldn't be? +- Do value distributions look reasonable (run MIN, MAX, AVG, percentiles for + numeric columns)? ### 6. Performance & best practices + - Are there SELECT * or unnecessary columns being carried through? - Could CTEs be simplified or combined? -- Are there window functions that could be replaced with simpler aggregations, or vice versa? -- Is the model incremental where it should be, or full-refresh where incremental would be better? +- Are there window functions that could be replaced with simpler aggregations, + or vice versa? +- Is the model incremental where it should be, or full-refresh where incremental + would be better? ### 7. Test coverage gaps -{{#if existing_tests}} -The following dbt tests are already defined for this model: -{{existing_tests}} + +{{#if existing_tests}} The following dbt tests are already defined for this +model: {{existing_tests}} Identify what is NOT covered by existing tests. Focus recommendations on gaps. -{{/if}} -{{^if existing_tests}} -No dbt tests were found for this model. Recommend the most important tests to add. -{{/if}} +{{/if}} {{^if existing_tests}} No dbt tests were found for this model. Recommend +the most important tests to add. {{/if}} ### 8. Upstream dependency risks -{{#if lineage}} -Model lineage (immediate upstream/downstream): -{{lineage}} -Consider: if an upstream model delivers late, delivers duplicates, or changes its grain, how does this model behave? Are there defensive checks? -{{/if}} +{{#if lineage}} Model lineage (immediate upstream/downstream): {{lineage}} + +Consider: if an upstream model delivers late, delivers duplicates, or changes +its grain, how does this model behave? Are there defensive checks? {{/if}} ## Context ### Compiled SQL + {{compiled_sql}} ### Sample rows + {{sample_rows}} ### Data profile -{{#if data_profile}} -{{data_profile}} -{{/if}} + +{{#if data_profile}} {{data_profile}} {{/if}} ## Output format diff --git a/dbt/dbt_quick_analysis.md b/dbt/dbt_quick_analysis.md index 6eb4511e..391998b9 100644 --- a/dbt/dbt_quick_analysis.md +++ b/dbt/dbt_quick_analysis.md @@ -1,4 +1,4 @@ Output the complete SQL file with inline comments added as SQL comments (-- ). -Add brief comments suggesting improvements, potential issues, or best-practice violations. -Where appropriate, include a short explanation of why the suggestion matters. -Output ONLY the SQL with comments, no markdown fences, no preamble. +Add brief comments suggesting improvements, potential issues, or best-practice +violations. Where appropriate, include a short explanation of why the suggestion +matters. Output ONLY the SQL with comments, no markdown fences, no preamble. diff --git a/llm/PROMPT.md b/llm/PROMPT.md deleted file mode 100644 index 6cbf6c7b..00000000 --- a/llm/PROMPT.md +++ /dev/null @@ -1,13 +0,0 @@ -# Background - -Hello, for this session you are a world class software engineer, with pragmatic -sensibilities. We are working on a project I need your assistance with updates -or debugging. I will provide background in this message for you and then in the -following message provide context of the problem I would like to work on today. -For you first message I would like you to just respond with "understood". After -my next message where I outline what I want to work on today I would like you -to ask questions, and in response you will provide a technical spec what we're -planning to work on today. Then only when I write "build" you will implement -the solution. Please remember to add detailed comments and explanations -throughout your generated code. - diff --git a/nvim/README.md b/nvim/README.md index 185280b0..f5c479d6 100644 --- a/nvim/README.md +++ b/nvim/README.md @@ -1,4 +1,4 @@ # 💤 LazyVim -A starter template for [LazyVim](https://github.com/LazyVim/LazyVim). -Refer to the [documentation](https://lazyvim.github.io/installation) to get started. +A starter template for [LazyVim](https://github.com/LazyVim/LazyVim). Refer to +the [documentation](https://lazyvim.github.io/installation) to get started. diff --git a/nvim/ftplugin/quarto.lua b/nvim/ftplugin/quarto.lua index fd04efa4..b63182c6 100644 --- a/nvim/ftplugin/quarto.lua +++ b/nvim/ftplugin/quarto.lua @@ -10,6 +10,15 @@ vim.b.completion = false -- don't run vim ftplugin on top vim.api.nvim_buf_set_var(0, "did_ftplugin", true) +-- LSP attaches after the ftplugin and re-sets formatexpr; clear it again so +-- gq uses vim's built-in paragraph reflower instead of the LSP code formatter. +vim.api.nvim_create_autocmd("LspAttach", { + buffer = 0, + callback = function() + vim.opt_local.formatexpr = "" + end, +}) + -- markdown vs. quarto hacks local ns = vim.api.nvim_create_namespace("QuartoHighlight") vim.api.nvim_set_hl(ns, "@markup.strikethrough", { strikethrough = false }) diff --git a/nvim/lua/plugins/language.lua b/nvim/lua/plugins/language.lua index f8912b05..b064ff34 100644 --- a/nvim/lua/plugins/language.lua +++ b/nvim/lua/plugins/language.lua @@ -41,12 +41,16 @@ return { "stevearc/conform.nvim", opts = { formatters_by_ft = { - quarto = { "injected" }, + -- injected formats code cells via treesitter language injections; + -- prettier then hard-wraps prose. prettier is used over mdformat for + -- quarto because it preserves {{< >}} shortcodes, which mdformat + -- escapes. Caveat: ::: div content written without surrounding blank + -- lines gets joined into one paragraph by both tools. + quarto = { "injected", "prettier" }, sql = { "sqlfmt" }, markdown = { "mdformat" }, vimwiki = { "mdformat" }, }, - -- Configure mdformat with text wrapping and formatting options formatters = { mdformat = { prepend_args = { @@ -54,27 +58,42 @@ return { "--number", -- Use numbered lists consistently }, }, - }, - -- See: - -- https://github.com/jmbuhr/quarto-nvim-kickstarter/blob/382b050e13eada7180ad048842386be37e820660/lua/plugins/editing.lua#L29-L81 - injected = { - -- Set the options field - options = { - -- Set to true to ignore errors - ignore_errors = false, - -- Map of treesitter language to file extension - -- A temporary file name with this extension will be generated during formatting - -- because some formatters care about the filename. - lang_to_ext = { - bash = "sh", - latex = "tex", - markdown = "md", - python = "py", - vimwiki = "md", + prettier = { + prepend_args = { "--prose-wrap", "always", "--print-width", "80" }, + options = { + -- prettier can't infer a parser from the .qmd extension + ft_parsers = { quarto = "markdown" }, + }, + }, + -- Use the ansible-managed venv ruff rather than mason's, which sits + -- first on PATH inside nvim but lags behind: preserving quarto's `#|` + -- cell-option comments (instead of rewriting them to `# |`) needs + -- ruff >= 0.15.17. + ruff_format = { + command = vim.fn.expand("~/.venvs/nvim/bin/ruff"), + }, + -- See: + -- https://github.com/jmbuhr/quarto-nvim-kickstarter/blob/382b050e13eada7180ad048842386be37e820660/lua/plugins/editing.lua#L29-L81 + injected = { + options = { + ignore_errors = false, + -- Map of treesitter language to file extension + -- A temporary file name with this extension will be generated during formatting + -- because some formatters care about the filename. + lang_to_ext = { + bash = "sh", + latex = "tex", + markdown = "md", + python = "py", + vimwiki = "md", + }, + -- Code cells need an explicit formatter entry here: python files + -- are normally formatted by the ruff LSP, which the injected + -- formatter cannot call. + lang_to_formatters = { + python = { "ruff_format" }, + }, }, - -- Map of treesitter language to formatters to use - -- (defaults to the value from formatters_by_ft) - lang_to_formatters = {}, }, }, }, diff --git a/skills/explain.md b/skills/explain.md index 1cefbef1..1cf8bcfe 100644 --- a/skills/explain.md +++ b/skills/explain.md @@ -1,6 +1,6 @@ --- name: explain -description: > +description: >- Use this skill when the user wants to understand a file — whether they ask to "explain," "walk me through," "what does this do," or simply point at a file and ask about it. Produces a two-part report: an intuitive overview of @@ -26,7 +26,8 @@ Given a file path, read the file and produce a report with exactly two sections: ### Part 1 — Intuitive Explanation -Explain what this file does as if describing it to a colleague who understands software but has no context on this project. Cover: +Explain what this file does as if describing it to a colleague who understands +software but has no context on this project. Cover: - What problem it solves or what role it plays - How it fits into the broader system @@ -41,6 +42,8 @@ Walk through the implementation with precision. Cover: - Structure: key functions, classes, or sections and how they relate - Data flow: what comes in, what goes out, what gets transformed - Dependencies: what it relies on and what relies on it -- Notable decisions: non-obvious implementation choices, trade-offs, or constraints +- Notable decisions: non-obvious implementation choices, trade-offs, or + constraints -Reference specific line numbers and identifiers. Do not restate Part 1 in technical language — add new information. +Reference specific line numbers and identifiers. Do not restate Part 1 in +technical language — add new information. diff --git a/tmux/cheatsheet.md b/tmux/cheatsheet.md index 5d8b30c1..21ab00bc 100644 --- a/tmux/cheatsheet.md +++ b/tmux/cheatsheet.md @@ -2,56 +2,56 @@ Prefix key: `Ctrl-a` -All window and tab management is handled by tmux. Ghostty's `Cmd-T` and -`Cmd-N` are unbound to avoid conflicts. +All window and tab management is handled by tmux. Ghostty's `Cmd-T` and `Cmd-N` +are unbound to avoid conflicts. ## tmux (prefix = Ctrl-a) ### Sessions -| Keys | Action | -| --- | --- | -| `prefix d` | Detach from session | -| `prefix s` | List / switch sessions | -| `prefix $` | Rename session | -| `tmux new -s name` | New named session | +| Keys | Action | +| ------------------ | ---------------------- | +| `prefix d` | Detach from session | +| `prefix s` | List / switch sessions | +| `prefix $` | Rename session | +| `tmux new -s name` | New named session | ### Windows (tabs inside tmux) -| Keys | Action | -| --- | --- | -| `prefix c` | New window | -| `prefix ,` | Rename window | -| `prefix n` / `prefix p` | Next / previous window | -| `prefix 1-9` | Jump to window by number | -| `prefix Ctrl-a` | Toggle last window | -| `prefix &` | Kill window | +| Keys | Action | +| ----------------------- | ------------------------ | +| `prefix c` | New window | +| `prefix ,` | Rename window | +| `prefix n` / `prefix p` | Next / previous window | +| `prefix 1-9` | Jump to window by number | +| `prefix Ctrl-a` | Toggle last window | +| `prefix &` | Kill window | ### Panes (splits inside a window) -| Keys | Action | -| --- | --- | -| `prefix i` | Split horizontal | -| `prefix u` | Split vertical | +| Keys | Action | +| -------------- | ----------------------------------- | +| `prefix i` | Split horizontal | +| `prefix u` | Split vertical | | `Ctrl-h/j/k/l` | Navigate panes (vim-tmux-navigator) | -| `prefix z` | Zoom / unzoom pane | -| `prefix x` | Kill pane | -| `prefix Space` | Enter copy mode (vi keys) | +| `prefix z` | Zoom / unzoom pane | +| `prefix x` | Kill pane | +| `prefix Space` | Enter copy mode (vi keys) | ### Copy mode (vi) -| Keys | Action | -| --- | --- | -| `/` | Search forward | -| `?` | Search backward | -| `v` | Begin selection | -| `y` | Yank selection | -| `q` | Exit copy mode | +| Keys | Action | +| ---- | --------------- | +| `/` | Search forward | +| `?` | Search backward | +| `v` | Begin selection | +| `y` | Yank selection | +| `q` | Exit copy mode | ## Ghostty -| Keys | Action | -| --- | --- | +| Keys | Action | +| ----------------- | ------------- | | `Cmd-+` / `Cmd--` | Zoom in / out | -| `Cmd-Shift-,` | Open config | -| `Cmd-K` | Clear screen | +| `Cmd-Shift-,` | Open config | +| `Cmd-K` | Clear screen |