unexge · unexge · Feb 7, 2026 · Feb 7, 2026 · Feb 7, 2026
diff --git a/.agents/skills/ship-it/SKILL.md b/.agents/skills/ship-it/SKILL.md
@@ -0,0 +1,31 @@
+---
+name: ship-it
+description: "Creates a GitHub PR using jj and gh. Use when asked to ship, create a PR, or push changes."
+---
+
+# Ship It
+
+Reviews and ships the current changes as a GitHub PR.
+
+## Workflow
+
+1. Gather context:
+   - Run `jj status` to see current status
+   - Run `jj diff -f main` to see the diff from main
+   - Run `jj log -r main::` to see commits from main
+
+2. **Review the changes** before proceeding:
+   - Code quality and best practices
+   - Potential bugs or issues
+   - Performance considerations
+   - Memory safety (leaks, double-frees, proper use of `errdefer`)
+   - Test coverage
+
+   Use the repository's AGENTS.md for guidance on style and conventions. If you find any issues, stop and report them to the user instead of creating the PR.
+
+3. Add commit description if empty with an appropriate commit message using `jj describe -m "Commit description"`
+4. Create a new empty change using `jj new`
+5. Push the change to upstream using `jj git push -c @-`
+6. Get the autogenerated bookmark name (should be something like `push-*`) using `jj bookmark list -r 'main+::'`
+7. Create a PR using the GitHub CLI (`gh pr create`)
+8. Enable auto-merge with `gh pr merge <PR_NUMBER> --auto --squash`
diff --git a/.claude/commands/create-pr.md b/.claude/commands/create-pr.md
diff --git a/.claude/settings.json b/.claude/settings.json
diff --git a/CLAUDE.md b/CLAUDE.md
diff --git a/README.md b/README.md
@@ -107,3 +107,53 @@ Supported logical types:
 - **Temporal**: `Date`, `TimeMillis`, `TimeMicros`, `TimeNanos`, `TimestampMillis`, `TimestampMicros`, `TimestampNanos`
 - **Numeric**: `Int8`, `UInt8`, `Int16`, `UInt16`, `UInt32`, `UInt64`, `Float16`, `Decimal`
 - **Other**: `UUID`, `String`, `Enum`, `Json`, `Bson`
+
+### Schema Inspection
+
+After reading a file, you can inspect its schema through `file.metadata.schema`, which is an array of `SchemaElement` entries. The first element is always the root; the rest describe individual fields.
+
+**Listing all fields and their types:**
+```zig
+for (file.metadata.schema[1..]) |elem| {
+    std.debug.print("name={s} type={any} repetition={any} logical={any}\n", .{
+        elem.name,
+        elem.type,
+        elem.repetition_type,
+        elem.logicalType,
+    });
+}
+```
+
+Each `SchemaElement` exposes:
+- `name` — field name
+- `type` — physical type (`BOOLEAN`, `INT32`, `INT64`, `INT96`, `FLOAT`, `DOUBLE`, `BYTE_ARRAY`, `FIXED_LEN_BYTE_ARRAY`)
+- `repetition_type` — `REQUIRED`, `OPTIONAL`, or `REPEATED`
+- `logicalType` — logical type (`STRING`, `DATE`, `TIMESTAMP`, `DECIMAL`, `UUID`, `MAP`, `LIST`, etc.)
+- `converted_type` — legacy converted type
+- `num_children` — non-null for group (struct/nested) elements
+- `type_length` — byte width for `FIXED_LEN_BYTE_ARRAY`
+- `scale` / `precision` — for decimal types
+
+**Looking up a column by name:**
+```zig
+const info = file.findSchemaElement(&.{"fare_amount"}).?;
+// info.column_index — index to pass to readColumn / readColumnDynamic
+// info.max_definition_level — for nullable columns
+// info.max_repetition_level — for repeated (list) columns
+// info.elem — the SchemaElement with full type info
+```
+
+For nested schemas, pass the full path:
+```zig
+const nested = file.findSchemaElement(&.{ "address", "city" }).?;
+```
+
+**File-level metadata** is also available:
+```zig
+std.debug.print("version: {d}\n", .{file.metadata.version});
+std.debug.print("num_rows: {d}\n", .{file.metadata.num_rows});
+std.debug.print("row_groups: {d}\n", .{file.metadata.row_groups.len});
+if (file.metadata.created_by) |created_by| {
+    std.debug.print("created_by: {s}\n", .{created_by});
+}
+```
diff --git a/scripts/download-public-datasets.sh b/scripts/download-public-datasets.sh
@@ -75,6 +75,65 @@ download_clickbench() {
     fi
 }
 
+# =============================================================================
+# TPC-H SF1 Dataset
+# Generated using DuckDB's TPC-H extension
+# =============================================================================
+download_tpch() {
+    local mode="$1"
+    local dest="$DEST_DIR/tpch-sf1"
+    mkdir -p "$dest"
+
+    echo "=== TPC-H SF1 Dataset ==="
+
+    local small_tables=("nation" "region" "supplier")
+    local big_tables=("lineitem" "orders" "customer" "part" "partsupp")
+
+    local need_generate=false
+    for table in "${small_tables[@]}"; do
+        if [[ ! -f "$dest/$table.parquet" ]]; then
+            need_generate=true
+            break
+        fi
+    done
+
+    if [[ "$mode" == "all" ]]; then
+        for table in "${big_tables[@]}"; do
+            if [[ ! -f "$dest/$table.parquet" ]]; then
+                need_generate=true
+                break
+            fi
+        done
+    fi
+
+    if [[ "$need_generate" == "false" ]]; then
+        echo "  All required TPC-H files already exist"
+        return
+    fi
+
+    local tables_to_generate=""
+    if [[ "$mode" == "all" ]]; then
+        tables_to_generate="nation region supplier lineitem orders customer part partsupp"
+    else
+        tables_to_generate="nation region supplier"
+    fi
+
+    echo "  Generating TPC-H SF1 data via DuckDB..."
+    uvx --from "duckdb" --with pyarrow python -c "
+import duckdb
+con = duckdb.connect()
+con.execute('INSTALL tpch; LOAD tpch; CALL dbgen(sf=1)')
+for table in '${tables_to_generate}'.split():
+    dest = '${dest}/' + table + '.parquet'
+    import os
+    if not os.path.exists(dest):
+        print(f'  Generating: {table}.parquet')
+        con.execute(f\"COPY {table} TO '{dest}' (FORMAT PARQUET)\")
+    else:
+        print(f'  Already exists: {table}.parquet')
+"
+}
+
 # =============================================================================
 # Add more datasets here following the same pattern
 # =============================================================================
@@ -116,6 +175,7 @@ mkdir -p "$DEST_DIR"
 
 download_nyc_taxi "$MODE"
 download_clickbench "$MODE"
+download_tpch "$MODE"
 
 echo ""
 echo "Done!"